diff --git a/_clang-format b/.clang-format similarity index 93% rename from _clang-format rename to .clang-format index d1af2d18..afb5fa06 100644 --- a/_clang-format +++ b/.clang-format @@ -7,7 +7,7 @@ AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left AlignOperands: true -AlignTrailingComments: true +AlignTrailingComments: false AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true @@ -130,6 +130,8 @@ RawStringFormats: ReflowComments: true SortIncludes: true SortUsingDeclarations: true + +# spaces SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true @@ -140,12 +142,19 @@ SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 2 +SpacesBeforeTrailingComments: 1 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false + +# https://stackoverflow.com/questions/67396557/adding-space-after-in-clang-format +# this is to allow both // and //space commants to be unmangled +SpacesInLineCommentPrefix: + Minimum: 0 + Maximum: 1 + Standard: Auto StatementMacros: - Q_UNUSED diff --git a/.clang-format-ignore b/.clang-format-ignore new file mode 100644 index 00000000..cf89029a --- /dev/null +++ b/.clang-format-ignore @@ -0,0 +1,37 @@ +#comments look like this, so these dirs are processed + +gltf/* + +#hlslparser/* + +#kram-preview/* +#kram-profile/* +#kram-profile/CBA/* +#kram-shader/* +#kram-thumb/* +#kram-thumb-win/* +#kramc +#kramv + +libkram/allocate/* +libkram/astc-encoder/* +libkram/bc7enc/* +libkram/cgltf/* +libkram/compressonator/* +libkram/eastl/* +#libkram/etc2comp/* +libkram/fastl/* +libkram/fmt/* +libkram/heman/* +libkram/json11/* +#libkram/kram/* +libkram/lodepng/* +libkram/miniz/* +libkram/simdjson/* +libkram/squish/* +libkram/tmpfileplus/* +libkram/transcoder/* +#libkram/vectormath/* +libkram/zstd/* + +plugin/* diff --git a/.gitattributes b/.gitattributes index 1ff03220..86ca36cb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,54 @@ +# This controls the line-endings on various file types. +# Most win editors can cope with lf ending files, +# and use binary load to avoid cstdlib replacement. + +# txt/sh/py scripts need to be lf to run across wsl/macOS +*.sh text eol=lf +*.py text eol=lf +*.txt text eol=lf + +*.json text eol=lf +*.plist text eol=lf +*.xconfig text eol=lf + +*.md text eol=lf +LICENSE text eol=lf +meson.build text eol=lf +*.vcproj text eol=crlf + +# what about .cpp/.h files? + +#------------- +# commit various binary file types to git-lfs +# see here https://rehansaeed.com/gitattributes-best-practices/ +# -text means it's not a text file and is binary + +# Archives +*.7z filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text + +*.ico filter=lfs diff=lfs merge=lfs -text +*.jpg filter=lfs diff=lfs merge=lfs -text *.png filter=lfs diff=lfs merge=lfs -text *.ktx filter=lfs diff=lfs merge=lfs -text *.ktx2 filter=lfs diff=lfs merge=lfs -text *.dds filter=lfs diff=lfs merge=lfs -text +*.psd filter=lfs diff=lfs merge=lfs -text + +# Documents +*.pdf filter=lfs diff=lfs merge=lfs -text + +# Models +*.obj filter=lfs diff=lfs merge=lfs +*.gltf filter=lfs diff=lfs merge=lfs +*.glb filter=lfs diff=lfs merge=lfs -text +*.fbx filter=lfs diff=lfs merge=lfs -text +*.usda filter=lfs diff=lfs merge=lfs -text +*.usdc filter=lfs diff=lfs merge=lfs -text +*.usdz filter=lfs diff=lfs merge=lfs -text +*.rkassets filter=lfs diff=lfs merge=lfs -text + +# Other +*.exe filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/pre-release.yml b/.github/workflows/pre-release.yml index 79f9c673..ebb5ef13 100644 --- a/.github/workflows/pre-release.yml +++ b/.github/workflows/pre-release.yml @@ -13,14 +13,16 @@ jobs: strategy: matrix: #os: [ubuntu-latest, macos-latest, windows-latest] - os: [macos-latest, windows-2019] + #os: [macos-latest, windows-latest] + #os: [macos-15, windows-latest] + os: [ubuntu-latest, macos-15, windows-latest] steps: - name: Update CMake uses: lukka/get-cmake@latest - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Build and install to bin/ run: ./scripts/cibuild.sh ${{ matrix.os }} diff --git a/.github/workflows/tagged-release.yml b/.github/workflows/tagged-release.yml index 231bcaba..bb9c3414 100644 --- a/.github/workflows/tagged-release.yml +++ b/.github/workflows/tagged-release.yml @@ -12,14 +12,16 @@ jobs: strategy: matrix: #os: [ubuntu-latest, macos-latest, windows-latest] - os: [macos-latest, windows-2019] + #os: [macos-latest, windows-latest] + #os: [macos-15, windows-latest] + os: [ubuntu-latest, macos-15, windows-latest] steps: - name: Update CMake uses: lukka/get-cmake@latest - name: Check out code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Build and install to bin/ run: ./scripts/cibuild.sh ${{ matrix.os }} @@ -41,4 +43,4 @@ jobs: file: bin/*.zip file_glob: true tag: ${{ github.ref }} - overwrite: true \ No newline at end of file + overwrite: true diff --git a/.gitignore b/.gitignore index d4740c2b..6dc47462 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,8 @@ libkram/kram/KramVersion.h *.obj # Precompiled Headers -*.gch -*.pch +# *.gch +# *.pch # Compiled Dynamic libraries *.so diff --git a/CMakeLists.txt b/CMakeLists.txt index 064119e7..7ec939ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,46 +1,26 @@ -# for now don't require high cmake for TravisCI builds on Win, but need 3.19.2 for universal app builds -if (APPLE) - cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR) -else() - cmake_minimum_required(VERSION 3.18.0 FATAL_ERROR) -endif() - +cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR) + #----------------------------------------------------- -set(BUILD_IOS FALSE) +# really not using cmake for mac, but this was used in the past so leaving it in +# still building Win using Cmake. macOS uses avx2 and Win uses avx2. set(BUILD_MAC FALSE) set(BUILD_WIN FALSE) -set(BUILD_UNIX FALSE) +set(BUILD_LINUX FALSE) if (APPLE) - if (CMAKE_SYSTEM_NAME STREQUAL "iOS") - message("build for iOS") - set(BUILD_IOS TRUE) - else() - message("build for macOS") - set(BUILD_MAC TRUE) - endif() + message(STATUS "build for macOS") + set(BUILD_MAC TRUE) elseif (WIN32) - message("build for win x64") + message(STATUS "build for win x64") set(BUILD_WIN TRUE) -elseif (UNIX AND NOT APPLE) - message("build for unix") - set(BUILD_UNIX TRUE) +elseif (LINUX) + message(STATUS "build for linux x64") + set(BUILD_LINUX TRUE) endif() #----------------------------------------------------- -# SYSROOT must be set before project, -# SYSROOT is max OS, deployment is min OS on Apple. -# If SYSROOT not set, then cmake uses min OS from deployment target. Ugh. -# so have to force SYSROOT to latest SDK. -# Want to set 11.0 here, but Xcode 12.3 ships with 11.1, etc. -# So then cmake breaks when it cannot find the C compiler, etc. -# Setting macosx to take the latest greatest sdk. - -# don't change these to set_property(GLOBAL) or set_target_properties, the need to be set prior to project -# and only seem to work if set() is used to force the global value. - # suppress ZERO_CHECK project set(CMAKE_SUPPRESS_REGENERATION true) @@ -48,143 +28,128 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED YES) set(CMAKE_CXX_EXTENSIONS NO) -# Xcode 12.2 ships with macosx11.0, but 12.3 ships with macosx11.1 -# cmake on 12.3 completely breaks when this is set and can't find c compilers. -# set(CMAKE_OSX_SYSROOT macosx11.0) -# set(CMAKE_OSX_SYSROOT macos) # this doesn't work - -# CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped -if (BUILD_IOS) - set(CMAKE_OSX_DEPLOYMENT_TARGET "14.1" CACHE STRING "Minimum iOS") - set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture iOS") -elseif (BUILD_MAC) - set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS") - set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS") -endif() - set(CMAKE_CONFIGURATION_TYPES "Debug;Release") set(CMAKE_BUILD_TYPE Release) - -if (BUILD_MAC) - set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc") -elseif (BUILD_IOS) - set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc") -elseif (BUILD_WIN) - set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc") -endif() - +set(CMAKE_DEFAULT_STARTUP_PROJECT "kramc") #----------------------------------------------------- +if (BUILD_LINUX) + set(myTargetWorkspace kramWorkspace) -# cmake translates project to sln in Win, but to xcode projects on Mac. -# No way to make xcode workspaces, but could do manually. -set(myTargetWorkspace kramWorkspace) + # don't want gcc, want clang + SET (CMAKE_CXX_COMPILER "/usr/bin/clang++" CACHE STRING "C++ compiler" FORCE) + SET (CMAKE_C_COMPILER "/usr/bin/clang" CACHE STRING "C compiler" FORCE) -if (BUILD_MAC OR BUILD_IOS) - project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX) -elseif (BUILD_WIN) project(${myTargetWorkspace} LANGUAGES C CXX) -endif() -# the kram static library libkram which should build on iOS/Android/Mac/Win -# this doesn't set a project, but maybe it should -add_subdirectory(libkram) + # want to only use clang across all platforms + message(STATUS "Using ${CMAKE_CXX_COMPILER_ID} compiler") + + # the kram static library libkram which should build on iOS/Android/Mac/Win + # this doesn't set a project, but maybe it should + add_subdirectory(libkram) -# the CLI app for Mac/Win that can build content for other platforms, uses libkram -add_subdirectory(kramc) + # the CLI app for Mac/Win that can build content for other platforms, uses libkram + add_subdirectory(kramc) -# the viewer is only written for macOS Intel/ARM currently, uses libkram -if (BUILD_MAC) - add_subdirectory(kramv) -endif() + set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) -# ps plugin that uses libkram -if (BUILD_MAC) - add_subdirectory(plugin) + # need app/libs to be in bin directory to zip archive + install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR}) + install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR}) + endif() - + #----------------------------------------------------- +if (BUILD_WIN) + # cmake translates project to sln in Win, but to xcode projects on Mac. + # No way to make xcode workspaces, but could do manually. + set(myTargetWorkspace kramWorkspace) -# https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8 -# use snipet from Alian Martin to validate SDK - -if (BUILD_MAC OR BUILD_IOS) - if(NOT DEFINED CMAKE_OSX_SYSROOT) - message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined." - ) - endif() - - # check the Xcode app itself for it's version - set(XCODE_MIN_APP 12.2) - if(XCODE AND XCODE_VERSION VERSION_LESS XCODE_MIN_APP) - message(FATAL_ERROR "This project requires at least Xcode ${XCODE_MIN_APP}") - endif() - - # check the SDK - set(XCODE_MIN_SDK_IOS 14.1) - set(XCODE_MIN_SDK_MACOS 11.0) - - execute_process( - COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version - OUTPUT_VARIABLE SDK_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - - if (BUILD_IOS) - message("iOS SDK ${SDK_VERSION}") - message("iOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}") - message("iOS arch ${CMAKE_OSX_ARCHITECTURES}") - - if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_IOS) - message(FATAL_ERROR "This project requires at least iPhoneOS ${XCODE_MIN_SDK_IOS}" - ) - endif() - - elseif (BUILD_MAC) - message("macOS SDK ${SDK_VERSION}") - message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}") - message("macOS arch ${CMAKE_OSX_ARCHITECTURES}") - - if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS) - message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}" - ) - endif() - endif() -endif() + project(${myTargetWorkspace} LANGUAGES C CXX) + + # want to only use clang across all platforms + message(STATUS "Using ${CMAKE_CXX_COMPILER_ID} compiler") + + #----------------------------------------------------- -#----------------------------------------------------- + # the kram static library libkram which should build on iOS/Android/Mac/Win + # this doesn't set a project, but maybe it should + add_subdirectory(libkram) -# was considering platform-specific builds, but mac/win don't conflict -set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) + # the CLI app for Mac/Win that can build content for other platforms, uses libkram + add_subdirectory(kramc) + + # TODO: this needs a shared libkram, but kramc uses static libkram + # this is an Explorer thumbnail extension (run script to un/register), uses libkram + # add_subdirectory(kram-thumb-win) -#if (BUILD_IOS) -# set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/ios) -#elseif (BUILD_MAC) -# set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/mac) -#elseif (BUILD_WIN) -# set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin/win) -#endif() - -# So by default install depends on ALL_BUILD target, but that will fail if plugin -# does not have everything setup to build (or like now is not building). -# The plugin is currently setting EXCLUDE_FROM_ALL on the target so it's not built. -# https://stackoverflow.com/questions/17164731/installing-only-one-target-and-its-dependencies-out-of-a-complex-project-with - -# install doesn't seem to do anything on WIN32, the build elements are not copied -install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR}) - -if (BUILD_MAC OR BUILD_WIN) + # hack hlslparser for win build into kram for now, does not use kram + # add_subdirectory(hlslparser) + + #----------------------------------------------------- + + set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) + + # need app/libs to be in bin directory to zip archive + install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR}) install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR}) + #install(TARGETS kram-thumb-win LIBRARY DESTINATION ${BIN_DIR}) + + # hlslparser is also now in the kram build. Keep executables up to date. + # I would use the sln file, but msbuild doesn't like to be called from cibuld.sh + # This builds but has a lot of warnings. When I resume work, will re-instate. + # install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR}) endif() +#----------------------------------------------------- +# This part is unmaintained. Couldn't build app extensions via CMake. +# So now just maintain projects + if (BUILD_MAC) + # cmake translates project to sln in Win, but to xcode projects on Mac. + # No way to make xcode workspaces, but could do manually. + set(myTargetWorkspace kramWorkspace) + + project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX) + + # CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped + set(CMAKE_OSX_DEPLOYMENT_TARGET "13.0" CACHE STRING "Minimum macOS") + set(CMAKE_OSX_ARCHITECTURES "arm64" CACHE STRING "Architecture macOS") + + #----------------------------------------------------- + + # the kram static library libkram which should build on iOS/Android/Mac/Win + # this doesn't set a project, but maybe it should + add_subdirectory(libkram) + + # the CLI app for Mac/Win that can build content for other platforms, uses libkram + add_subdirectory(kramc) + + # the viewer is only written for macOS Intel/ARM currently, uses libkram + add_subdirectory(kramv) + + # ps plugin that uses libkram + add_subdirectory(plugin) + + # hlslparser needs some more work to modernize to a C++ style HLSL syntax + add_subdirectory(hlslparser) + + #----------------------------------------------------- + + # was considering platform-specific builds, but mac/win don't conflict + set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) + + install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR}) + install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR}) install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR}) -endif() + install(TARGETS hlslparser RUNTIME DESTINATION ${BIN_DIR}) -# don't install this yet -#if (BUILD_MAC) + # photoshop plugin # install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR}) -#endif() + +endif() + diff --git a/README.md b/README.md index 95daaa31..b08acec0 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,35 @@ -# kram, kram.exe -C++11 main to libkram to create CLI tool. Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2. Runs on macOS/win. +The suite of kram tools below. I hope these improve your game, app, and art development. + +# kram-profile +Display profile traces (f.e. Perfetto) quickly in an application for optimizing memory, builds, and cpu/gpu timings +https://github.com/alecazam/kram/tree/main/kram-profile + +# hslparser +Parses HLSL syntax and generates readable HLSL/MSL code without transpiling. DXC is then used to compile to spirv. +https://github.com/alecazam/kram/tree/main/hlslparser + +# vectormath +Fast vector math based around clang vector extensions. Requires clang but accelerated for arm64/neon, x64/avx2+fma+f16c. +https://github.com/alecazam/kram/tree/main/libkram/vectormath # libkram.a, libkram-ios.a, kram.lib -C++11 library from 200 to 800KB in size depending on encoder options. Compiles for iOS (ARM), macOS (ARM/Intel), win (Intel). +C++11 library from 200 to 800KB in size depending on encoder options. Compiles for iOS/macOS (arm64), win/linux (x64). + +# kram, kram.exe +C++11 main to libkram to create CLI tool. Encode/decode/info on PNG/KTX/KTX2/DDS files with LDR/HDR and BC/ASTC/ETC2. Runs on macOS(arm64), win/linux(x64). + +# kram-thumb-win.dll +Windows thumbnailer for DDS/KTX/KTX2. Go to build or bin folder. Install with "regsvr32.exe kram-thumb-win.dll". Uninstall with "regsvr32.exe /u kram-thumb-win.dll" + +https://github.com/alecazam/kram/tree/main/kram-thumb-win # kramv.app -ObjC++ Viewer for PNG/KTX/KTX2/DDS supported files from kram. 530KB in size. Uses Metal compute and shaders, eyedropper, grids, debugging, preview. Supports HDR and all texture types. Mip, face, and array access. No dmg yet, just drop onto /Applications folder. Runs on macOS (ARM/Intel). +ObjC++ viewer for PNG/KTX/KTX2/DDS supported files from kram. Uses Metal compute and shaders, eyedropper, grids, debugging, preview. Supports HDR and all texture types. Mip, face, volume, and array access. No dmg yet, just drop onto /Applications folder. Runs on macOS (arm64). Generates Finder thumbnails and QuickLook previews via modern macOS app extension mechanisms. Diagrams and screenshots can be located here: -https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram +https://www.figma.com/design/bPmPSpBGTi2xTVnBDqVEq0/kram?node-id=0-1&t=OnP0wHcDOmg7b7Vg-1 -#### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64). kramv for macOS, kram for macOS/Win, libkram for macOS/iOS/Win. Android library via NDK is possible, but f16 support is spotty on devices. +#### Releases includes builds for macOS (Xcode 15.3 - arm64/x64/clang) and Windows x64 (VS 2022 - x64/clang) and Linux (ubuntu-x64/clang). kramv for macOS, kram for macOS/Win/Linux, libkram for macOS/iOS/Win/Linux, win-thumb-kram for Win. Android library via NDK is possible, but f16 support is spotty on devices. ### About kram kram is a wrapper to several popular encoders. Most encoders have sources, and have been optimized to use very little memory and generate high quality encodings at all settings. All kram encoders are currently CPU-based. Some of these encoders use SSE, and a SSE to Neon layer translates those. kram was built to be small and used as a library or app. It's also designed for mobile and desktop use. The final size with all encoders is under 1MB, and disabling each encoder chops off around 200KB down to a final 200KB app size via dead-code stripping. The code should compile with C++11 or higher. @@ -22,7 +41,7 @@ Many of the encoder sources can multithread a single image, but that is unused. Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed. If the source png/ktx/ktx2 is older than the output, then the file is skipped. Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures. Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders. ### About kramv -kramv is a viewer for the BC/ASTC/ETC2 LDR/HDR KTX/KTX2/DDS textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2/DDS sources. kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them. macOS with Apple Silicon supports all three formats, and doesn't need to decode. +kramv is a viewer for the BC/ASTC/ETC2 LDR/HDR KTX/KTX2/DDS textures generated by kram from LDR PNG and LDR/HDR KTX/KTX2/DDS sources. kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them. macOS with Apple Silicon supports all three formats, and doesn't need to decode. I have macOS Intel support disabled as of 1/25, but can be re--enabled in the xcode project. kramv uses ObjC++ with the intent to port to Windows C++ as time permits. Uses menus, buttons, and keyboard handling useful for texture triage and analysis. Drag and drop folders, bundles, and click-to-launch are supported. Recently used textures/folders/bundles are listed in the menu. The app currently shows a single document at a time. Subsequent opens reuse the same document Window. With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. @@ -158,7 +177,7 @@ open build2/kram.xcworkspace ``` -kram was using CMake to setup the projects and build. kramv.app, kram, and libkram are generated, but kramv.app and kram are stand-alone. The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data. +kram was using CMake to setup the projects and build, but it doesn't support workspaces, clean, or the thumbnail/preview extension linking. As a result, kramv.app, kram, and libkram are generated. So I'm building kramv.app and everything with a custom Xcode project now. The library can be useful in apps that want to include the decoder, or runtime compression of gpu-generated data. For Mac, the CMake build is out-of-source, and can be built from the command line, or debugged from the xcodeproj that is built. Ninja and Makefiles can also be generated from cmake, but remember to trash the CMakeCache.txt file. @@ -173,13 +192,13 @@ or cmake --install ../bin --config Release ``` -For Windows, CMake is still used. I tried to fix CMake to build the library into the app directory so the app is updated. "Rebuild Solution" if your changes don't take effect, or if breakpoints stop being hit. +For Windows, CMake is still used. CMake build libkram, kramc, and kram-thumb-win.dll. This uses the clang compiler and x64 only. ``` mkdir build -cmake .. -G "Visual Studio 15 2017 Win64" +cmake .. -G "Visual Studio 16 2019" -T ClangCL -A x64 or -cmake .. -G "Visual Studio 16 2019" -A x64 +cmake .. -G "Visual Studio 17 2022" -T ClangCL -A x64 cmake --build . --config Release or @@ -222,7 +241,7 @@ Win python3.exe -m pip install -U pip python3.exe -m pip install -r ../scripts/requirements.txt -# this uses 8 processes, and bundles the results to a zip file +# Gen ktx using 8 processes, and bundles the results to a zip file ../scripts/kramTextures.py --jobs 8 -p android --bundle # this writes out a script of all commands and runs on threads in a single process @@ -230,13 +249,15 @@ python3.exe -m pip install -r ../scripts/requirements.txt ../scripts/kramTextures.py --jobs 8 -p mac --script --force ../scripts/kramTextures.py --jobs 8 -p win --script --force -# To move towards supercompressed ktx2 files, the following flags convert ktx output to ktx2 +# Generate ktx2 output, and then bundle them into a zip +../scripts/kramTextures.py -p any -c ktx2 --bundle +../scripts/kramTextures.py -p android -c ktx2 --bundle --check -# if ktxsc and ktx2ktx2 are present in the path, then these scripts generate ktx2 output, and then bundle them into a zip -../scripts/kramTextures.py -p any --ktx2 --bundle -../scripts/kramTextures.py -p android --ktx2 --bundle --check +# Generate dds output, and then bundle them into a zip +../scripts/kramTextures.py -p any -c dds --bundle +../scripts/kramTextures.py -p android -c dds --bundle --check -# if ktxsc and ktx2ktx2 are present in the path, this runs kramTextures across all platforms, requires ktx2ktx2 +# Generate textures for all platforms ../scripts/kramTests.sh ``` @@ -324,6 +345,12 @@ kram includes additional open-source: | miniz | Rich Gelreich | Unlicense | bundle support via zip | | gltfKit | Warren Moore | MIT | gltf decoder/renderer | +kram-thumb-win.dll addtional open-source + +| Library | Author | License | Purpose | +|----------------|--------------------|-----------|---------------------------| +| QOI thumbnails | iOrange | MIT | win thumbnailer | + #### Open source changes * lodepng - altered header paths. @@ -613,7 +640,7 @@ kram encourages the use of lossless and hdr source data. There are not many cho KTX is a well-designed format, and KTX2 continues that tradition. It was also faily easy to convert between these formats. Once mips are decoded, KTX2 looks very much like KTX. -Visually validating and previewing the results is complicated. KTX/2 have few viewers, hence the need for kramv. Apple's Preview can open BC and ASTC files on macOS, but not ETC/PVRTC. And then you can't look at channels or mips, or turn on/off premultiplied alpha, or view signed/unsigned data. Preview premultiplies PNG images, but KTX files aren't. Apple's thumbnails don't work for ETC2 or PVRTC data in KTX files. Windows thumbnails don't work for KTX at all. PVRTexToolGUI 2020R2 applies sRGB incorrectly to images, and can't open BC4/5/7 files on Mac. +Visually validating and previewing the results is complicated. KTX/2 have few viewers, hence the need for kramv. Apple's Preview can open BC/ASTC files on macOS without mips, but not ETC/PVRTC. It quarantines files opened. And then you can't look at channels or mips, or turn on/off premultiplied alpha, or view signed/unsigned data. Preview premultiplies PNG images, but KTX files aren't. Apple's thumbnails don't work for ETC2 or PVRTC data in KTX files. Windows thumbnails don't work for KTX at all. PVRTexToolGUI 2020R2 applies sRGB incorrectly to images, and can't open BC4/5/7 files on Mac. kram adds props to KTX/2 file to store data. Currently props store Metal and Vulkan formats. This is important since GL's ASTC LDR and HDR formats are the same constant. Also props are saved for channel content and post-swizzle. Loaders, viewers, and shaders can utilize this metadata. diff --git a/build2/kram.xcconfig b/build2/kram.xcconfig new file mode 100644 index 00000000..c63effb2 --- /dev/null +++ b/build2/kram.xcconfig @@ -0,0 +1,35 @@ +// kram - Copyright 2020-2025 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +// Xcode's AVX2 simd setting doesn't set -mfma or -m16c. So +// then universal builds throw hundreds of warnings. Ugh. +// Xcode doesn't set NDEBUG=1 in Release builds. Ugh. +// Also turn on -ftime-trace to review build times in kram-profile. + +// This setting only applies to x64, but many macs only have AVX (2019 MBP). +// Note that f16c is supported on AVX, but fma requires AVX2. +// If setting this to AVX, then set GLTF/GLTFMTL too since those don't use config. +// There isn't currently a vectormath fallback to avx (see SIMD_AVX2). +CLANG_X86_VECTOR_INSTRUCTIONS = avx2 + +KRAM_FLAGS_X64 = +KRAM_FLAGS_X64[sdk=*][arch=x86_64] = -mf16c -mfma + +KRAM_FLAGS_RELEASE = +KRAM_FLAGS_RELEASE[sdk=*][config=Release] = -DNDEBUG=1 + +KRAM_FLAGS = -ftime-trace +KRAM_FLAGS = $(KRAM_FLAGS) -DUSE_SIMDLIB=1 -DUSE_SIMDLIBMODULE=1 +KRAM_FLAGS = $(KRAM_FLAGS) -fmodules -fcxx-modules + +// KRAM_FLAGS += -DCOMPILE_EASTL=1 +// TODO: also set include path for eastl + +// configuring all the encoders in kram +// KRAM_FLAGS += -DCOMPILE_ASTCENC=1 -DCOMPILE_ATE=1 -DCOMPILE_ETCENC=1 -DCOMPILE_SQUISH=1 -DCOMPILE_BCENC=1 -DCOMPILE_COMP=1 -DCOMPILE_BASIS=0 -DCOMPILE_EASTL=0 + +// This is killing build times in Xcode16 +ENABLE_MODULE_VERIFIER = NO + +OTHER_CFLAGS = $(inherited) $(KRAM_FLAGS) $(KRAM_FLAGS_RELEASE) $(KRAM_FLAGS_X64) diff --git a/build2/kram.xcodeproj/project.pbxproj b/build2/kram.xcodeproj/project.pbxproj index 856e7154..26ee08f5 100644 --- a/build2/kram.xcodeproj/project.pbxproj +++ b/build2/kram.xcodeproj/project.pbxproj @@ -3,10 +3,13 @@ archiveVersion = 1; classes = { }; - objectVersion = 50; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ + 702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; }; + 706178192DE16211001545E1 /* KramFileIO.h in Headers */ = {isa = PBXBuildFile; fileRef = 706178172DE16211001545E1 /* KramFileIO.h */; }; + 7061781A2DE16211001545E1 /* KramFileIO.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706178182DE16211001545E1 /* KramFileIO.cpp */; }; 706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; }; 706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; }; 706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; }; @@ -19,8 +22,6 @@ 706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; }; 706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; }; 706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; }; - 706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; }; - 706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; }; 706EEFA726D1595D001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; }; 706EEFA826D1595D001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; }; 706EEFA926D1595D001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; }; @@ -35,7 +36,6 @@ 706EEFB226D1595D001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; }; 706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; }; 706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; }; - 706EEFB526D1595D001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; }; 706EEFB626D1595D001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; }; 706EEFB726D1595D001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; }; 706EEFB826D1595D001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; }; @@ -68,10 +68,6 @@ 706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; }; 706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; }; 706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; }; - 706EEFE326D15984001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; }; - 706EEFE426D15984001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; }; - 706EEFE526D15984001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; }; - 706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; }; 706EEFF226D15984001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; }; 706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; }; 706EEFF426D15984001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; }; @@ -87,7 +83,6 @@ 706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; }; 706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; }; 706EF00026D15985001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; }; - 706EF00126D15985001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; }; 706EF00226D15985001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; }; 706EF00326D15985001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; }; 706EF00426D15985001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; }; @@ -99,7 +94,6 @@ 706EF00A26D15985001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; }; 706EF00B26D15985001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; }; 706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; }; - 706EF00D26D15985001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; }; 706EF00E26D15985001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; }; 706EF00F26D15985001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; }; 706EF01026D15985001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; }; @@ -116,224 +110,107 @@ 706EF01B26D15985001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; }; 706EF01C26D15985001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; }; 706EF12B26D159F9001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; }; - 706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */; }; - 706EF14C26D166C5001C950E /* EtcColor.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAD26D1583E001C950E /* EtcColor.h */; }; - 706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDAE26D1583E001C950E /* EtcDifferentialTrys.h */; }; - 706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB026D1583E001C950E /* EtcBlock4x4Encoding_RGB8.h */; }; - 706EF14F26D166C5001C950E /* EtcConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB426D1583E001C950E /* EtcConfig.h */; }; - 706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB526D1583E001C950E /* EtcBlock4x4Encoding_R11.h */; }; - 706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB726D1583E001C950E /* EtcBlock4x4Encoding_RG11.h */; }; - 706EF15226D166C5001C950E /* EtcMath.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDB926D1583E001C950E /* EtcMath.h */; }; - 706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBA26D1583E001C950E /* EtcIndividualTrys.h */; }; - 706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBD26D1583E001C950E /* EtcBlock4x4EncodingBits.h */; }; - 706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDBE26D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.h */; }; - 706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC026D1583E001C950E /* EtcBlock4x4.h */; }; - 706EF15726D166C5001C950E /* Etc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC126D1583E001C950E /* Etc.h */; }; - 706EF15826D166C5001C950E /* EtcImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC226D1583E001C950E /* EtcImage.h */; }; - 706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC326D1583E001C950E /* EtcBlock4x4Encoding_ETC1.h */; }; - 706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC426D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.h */; }; - 706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */; }; - 706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */; }; - 706EF15D26D166C5001C950E /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCB26D1583E001C950E /* rgbcx.h */; }; - 706EF15E26D166C5001C950E /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCC26D1583E001C950E /* bc7enc.h */; }; - 706EF15F26D166C5001C950E /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDCD26D1583E001C950E /* bc7decomp.h */; }; - 706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDD126D1583E001C950E /* rgbcx_table4.h */; }; - 706EF16C26D166C5001C950E /* ateencoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFA26D1583E001C950E /* ateencoder.h */; }; - 706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFC26D1583E001C950E /* basisu_transcoder.h */; }; - 706EF16E26D166C5001C950E /* basisu_containers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFD26D1583E001C950E /* basisu_containers.h */; }; - 706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEDFF26D1583E001C950E /* basisu_containers_impl.h */; }; - 706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0226D1583F001C950E /* basisu_transcoder_internal.h */; }; - 706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0326D1583F001C950E /* basisu_global_selector_cb.h */; }; - 706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0526D1583F001C950E /* basisu_transcoder_uastc.h */; }; - 706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0626D1583F001C950E /* basisu_global_selector_palette.h */; }; - 706EF17426D166C5001C950E /* basisu.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0C26D1583F001C950E /* basisu.h */; }; - 706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE0E26D1583F001C950E /* basisu_file_headers.h */; }; - 706EF17626D166C5001C950E /* miniz.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1226D1583F001C950E /* miniz.h */; }; - 706EF17726D166C5001C950E /* hedistance.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1526D1583F001C950E /* hedistance.h */; }; - 706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1726D1583F001C950E /* stb_rect_pack.h */; }; - 706EF17926D166C5001C950E /* KramZipHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE1926D1583F001C950E /* KramZipHelper.h */; }; - 706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2026D1583F001C950E /* KramSDFMipper.h */; }; - 706EF17B26D166C5001C950E /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2226D1583F001C950E /* sse2neon.h */; }; - 706EF17C26D166C5001C950E /* KramConfig.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2326D1583F001C950E /* KramConfig.h */; }; - 706EF17D26D166C5001C950E /* KramLog.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2426D1583F001C950E /* KramLog.h */; }; - 706EF17E26D166C5001C950E /* KramLib.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2726D1583F001C950E /* KramLib.h */; }; - 706EF17F26D166C5001C950E /* KramVersion.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2926D1583F001C950E /* KramVersion.h */; }; - 706EF18026D166C5001C950E /* KramImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2A26D1583F001C950E /* KramImage.h */; }; - 706EF18126D166C5001C950E /* win_mmap.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2D26D1583F001C950E /* win_mmap.h */; }; - 706EF18226D166C5001C950E /* Kram.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE2E26D1583F001C950E /* Kram.h */; }; - 706EF18326D166C5001C950E /* KTXImage.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3026D1583F001C950E /* KTXImage.h */; }; - 706EF18426D166C5001C950E /* KramImageInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3126D1583F001C950E /* KramImageInfo.h */; }; - 706EF18526D166C5001C950E /* KramTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3226D1583F001C950E /* KramTimer.h */; }; - 706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3326D1583F001C950E /* KramMmapHelper.h */; }; - 706EF18726D166C5001C950E /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3426D1583F001C950E /* float4a.h */; }; - 706EF18826D166C5001C950E /* KramFileHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3626D1583F001C950E /* KramFileHelper.h */; }; - 706EF18926D166C5001C950E /* KramMipper.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3726D1583F001C950E /* KramMipper.h */; }; - 706EF18A26D166C5001C950E /* TaskSystem.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3826D1583F001C950E /* TaskSystem.h */; }; - 706EF18B26D166C5001C950E /* squish.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3A26D1583F001C950E /* squish.h */; }; - 706EF18C26D166C5001C950E /* clusterfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3B26D1583F001C950E /* clusterfit.h */; }; - 706EF18D26D166C5001C950E /* colourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3C26D1583F001C950E /* colourfit.h */; }; - 706EF18E26D166C5001C950E /* alpha.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE3F26D1583F001C950E /* alpha.h */; }; - 706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4126D1583F001C950E /* singlecolourfit.h */; }; - 706EF19026D166C5001C950E /* maths.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4526D1583F001C950E /* maths.h */; }; - 706EF19126D166C5001C950E /* colourset.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4826D1583F001C950E /* colourset.h */; }; - 706EF19226D166C5001C950E /* colourblock.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4A26D1583F001C950E /* colourblock.h */; }; - 706EF19326D166C5001C950E /* rangefit.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE4B26D1583F001C950E /* rangefit.h */; }; - 706EF19426D166C5001C950E /* zstd.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5226D1583F001C950E /* zstd.h */; }; - 706EF19526D166C5001C950E /* lodepng.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5426D1583F001C950E /* lodepng.h */; }; - 706EF19626D166C5001C950E /* tmpfileplus.h in Headers */ = {isa = PBXBuildFile; fileRef = 706EEE5926D1583F001C950E /* tmpfileplus.h */; }; - 706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */; }; - 706EF19926D166C5001C950E /* EtcImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAC26D1583E001C950E /* EtcImage.cpp */; }; - 706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDAF26D1583E001C950E /* EtcDifferentialTrys.cpp */; }; - 706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB126D1583E001C950E /* EtcMath.cpp */; }; - 706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB226D1583E001C950E /* EtcBlock4x4Encoding_RGBA8.cpp */; }; - 706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB326D1583E001C950E /* EtcBlock4x4Encoding_RG11.cpp */; }; - 706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDB626D1583E001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp */; }; - 706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBB26D1583E001C950E /* EtcIndividualTrys.cpp */; }; - 706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBC26D1583E001C950E /* EtcBlock4x4Encoding_R11.cpp */; }; - 706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDBF26D1583E001C950E /* EtcBlock4x4Encoding_ETC1.cpp */; }; - 706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC526D1583E001C950E /* EtcBlock4x4Encoding.cpp */; }; - 706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */; }; - 706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDCE26D1583E001C950E /* bc7decomp.cpp */; }; - 706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDD026D1583E001C950E /* bc7enc.cpp */; }; - 706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE0426D1583F001C950E /* basisu_transcoder.cpp */; }; - 706EF1C026D166C5001C950E /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1126D1583F001C950E /* miniz.cpp */; }; - 706EF1C126D166C5001C950E /* hedistance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1426D1583F001C950E /* hedistance.cpp */; }; - 706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1A26D1583F001C950E /* KramTimer.cpp */; }; - 706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1B26D1583F001C950E /* KTXImage.cpp */; }; - 706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1C26D1583F001C950E /* KramMipper.cpp */; }; - 706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */; }; - 706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE1F26D1583F001C950E /* TaskSystem.cpp */; }; - 706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2126D1583F001C950E /* KramFileHelper.cpp */; }; - 706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2526D1583F001C950E /* KramImageInfo.cpp */; }; - 706EF1C926D166C5001C950E /* KramImage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2626D1583F001C950E /* KramImage.cpp */; }; - 706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2826D1583F001C950E /* KramLog.cpp */; }; - 706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */; }; - 706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */; }; - 706EF1CD26D166C5001C950E /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE2F26D1583F001C950E /* float4a.cpp */; }; - 706EF1CE26D166C5001C950E /* Kram.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3526D1583F001C950E /* Kram.cpp */; }; - 706EF1CF26D166C5001C950E /* squish.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3D26D1583F001C950E /* squish.cpp */; }; - 706EF1D026D166C5001C950E /* colourset.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE3E26D1583F001C950E /* colourset.cpp */; }; - 706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4226D1583F001C950E /* clusterfit.cpp */; }; - 706EF1D226D166C5001C950E /* rangefit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4426D1583F001C950E /* rangefit.cpp */; }; - 706EF1D326D166C5001C950E /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4626D1583F001C950E /* alpha.cpp */; }; - 706EF1D426D166C5001C950E /* colourblock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4726D1583F001C950E /* colourblock.cpp */; }; - 706EF1D526D166C5001C950E /* colourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4926D1583F001C950E /* colourfit.cpp */; }; - 706EF1D626D166C5001C950E /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4D26D1583F001C950E /* maths.cpp */; }; - 706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE4E26D1583F001C950E /* singlecolourfit.cpp */; }; - 706EF1D826D166C5001C950E /* zstd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5026D1583F001C950E /* zstd.cpp */; }; - 706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5126D1583F001C950E /* zstddeclib.cpp */; }; - 706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5626D1583F001C950E /* lodepng.cpp */; }; - 706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EEE5826D1583F001C950E /* tmpfileplus.cpp */; }; - 706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF12A26D159F9001C950E /* libate.tbd */; }; 706EF26426D17DCC001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; }; - 706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EEDF926D1583E001C950E /* ateencoder.mm */; }; 706EFF7326D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; }; - 706EFF7426D34740001C950E /* thread_support.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5726D3473F001C950E /* thread_support.cpp */; }; 706EFF7526D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; }; - 706EFF7626D34740001C950E /* assert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5826D3473F001C950E /* assert.cpp */; }; 706EFF7726D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; }; - 706EFF7826D34740001C950E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5926D3473F001C950E /* string.cpp */; }; - 706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; }; - 706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5A26D3473F001C950E /* allocator_eastl.cpp */; }; 706EFF7B26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; }; - 706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5B26D3473F001C950E /* numeric_limits.cpp */; }; 706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; }; - 706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5D26D3473F001C950E /* intrusive_list.cpp */; }; 706EFF8126D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; }; - 706EFF8226D34740001C950E /* hashtable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5E26D3473F001C950E /* hashtable.cpp */; }; 706EFF8326D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; }; - 706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD5F26D3473F001C950E /* red_black_tree.cpp */; }; 706EFF8526D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; }; - 706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EFD6026D3473F001C950E /* fixed_pool.cpp */; }; + 707789D52881BA81008A51BC /* bc7enc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789C62881BA81008A51BC /* bc7enc.cpp */; }; + 707789D72881BA81008A51BC /* bc7enc.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C72881BA81008A51BC /* bc7enc.h */; }; + 707789D92881BA81008A51BC /* bc7decomp.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C82881BA81008A51BC /* bc7decomp.h */; }; + 707789DB2881BA81008A51BC /* ert.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789C92881BA81008A51BC /* ert.h */; }; + 707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CA2881BA81008A51BC /* rgbcx.cpp */; }; + 707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CB2881BA81008A51BC /* rgbcx_table4.h */; }; + 707789E12881BA81008A51BC /* utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CC2881BA81008A51BC /* utils.cpp */; }; + 707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */; }; + 707789E52881BA81008A51BC /* ert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789CE2881BA81008A51BC /* ert.cpp */; }; + 707789E72881BA81008A51BC /* rgbcx.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789CF2881BA81008A51BC /* rgbcx.h */; }; + 707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D02881BA81008A51BC /* bc7decomp.cpp */; }; + 707789EB2881BA81008A51BC /* utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789D22881BA81008A51BC /* utils.h */; }; + 707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */; }; + 707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */; }; + 707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */; }; + 707B2AB42D99BF7A00DD3F0B /* KramThreadPool.h in Headers */ = {isa = PBXBuildFile; fileRef = 707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */; }; + 707B2AB52D99BF7A00DD3F0B /* KramThreadPool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */; }; 70871DC927DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; }; - 70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */; }; 70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; }; - 70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */; }; 70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; }; - 70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */; }; 70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; }; - 70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAA27DDDBCC00D0B9E1 /* astcenc_symbolic_physical.cpp */; }; 70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; }; - 70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAB27DDDBCC00D0B9E1 /* astcenc_averages_and_directions.cpp */; }; 70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; }; - 70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAC27DDDBCC00D0B9E1 /* astcenc_partition_tables.cpp */; }; 70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; }; - 70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DAD27DDDBCC00D0B9E1 /* astcenc.h */; }; 70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; }; - 70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAE27DDDBCC00D0B9E1 /* astcenc_quantization.cpp */; }; 70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; }; - 70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DAF27DDDBCC00D0B9E1 /* astcenc_compute_variance.cpp */; }; 70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; }; - 70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB027DDDBCC00D0B9E1 /* astcenc_percentile_tables.cpp */; }; 70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; }; - 70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB127DDDBCC00D0B9E1 /* astcenc_vecmathlib_sse_4.h */; }; 70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; }; - 70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB227DDDBCC00D0B9E1 /* astcenc_mathlib_softfloat.cpp */; }; - 70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; }; - 70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB327DDDBCC00D0B9E1 /* astcenc_mathlib.cpp */; }; 70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; }; - 70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB427DDDBCC00D0B9E1 /* astcenc_decompress_symbolic.cpp */; }; 70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; }; - 70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB527DDDBCC00D0B9E1 /* astcenc_compress_symbolic.cpp */; }; 70871DE727DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; }; - 70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB627DDDBCC00D0B9E1 /* astcenc_entry.cpp */; }; 70871DE927DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; }; - 70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB727DDDBCC00D0B9E1 /* astcenc_integer_sequence.cpp */; }; 70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; }; - 70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DB827DDDBCC00D0B9E1 /* astcenc_block_sizes.cpp */; }; 70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; }; - 70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DB927DDDBCC00D0B9E1 /* astcenc_diagnostic_trace.h */; }; 70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; }; - 70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBA27DDDBCC00D0B9E1 /* astcenc_weight_align.cpp */; }; 70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; }; - 70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBB27DDDBCC00D0B9E1 /* astcenc_mathlib.h */; }; 70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; }; - 70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBC27DDDBCC00D0B9E1 /* astcenc_internal.h */; }; 70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; }; - 70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DBD27DDDBCC00D0B9E1 /* astcenc_color_quantize.cpp */; }; 70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; }; - 70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBE27DDDBCC00D0B9E1 /* astcenc_vecmathlib_neon_4.h */; }; 70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; }; - 70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DBF27DDDBCC00D0B9E1 /* astcenc_vecmathlib_avx2_8.h */; }; 70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; }; - 70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC027DDDBCC00D0B9E1 /* astcenc_vecmathlib_none_4.h */; }; 70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; }; - 70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */ = {isa = PBXBuildFile; fileRef = 70871DC127DDDBCC00D0B9E1 /* astcenc_vecmathlib.h */; }; 70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; }; - 70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC227DDDBCC00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp */; }; 70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; }; - 70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC327DDDBCC00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp */; }; 70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; }; - 70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC427DDDBCC00D0B9E1 /* astcenc_color_unquantize.cpp */; }; 70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; }; - 70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC527DDDBCC00D0B9E1 /* astcenc_platform_isa_detection.cpp */; }; 70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; }; - 70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC627DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp */; }; 70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; }; - 70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70871DC727DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp */; }; 708A6A962708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; }; - 708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8B2708CE4700BA5410 /* bc6h_decode.cpp */; }; 708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; }; - 708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8C2708CE4700BA5410 /* bc6h_decode.h */; }; 708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; }; - 708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 708A6A8D2708CE4700BA5410 /* bc6h_encode.cpp */; }; 708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; }; - 708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */; }; 708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; }; - 708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A902708CE4700BA5410 /* bc6h_definitions.h */; }; 708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; }; - 708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */ = {isa = PBXBuildFile; fileRef = 708A6A922708CE4700BA5410 /* bc6h_utils.h */; }; + 709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1C28D7BCAD0081BD1F /* ostream.h */; }; + 709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1D28D7BCAD0081BD1F /* format-inl.h */; }; + 709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1E28D7BCAD0081BD1F /* ranges.h */; }; + 709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D1F28D7BCAD0081BD1F /* xchar.h */; }; + 709B8D3528D7BCAD0081BD1F /* core.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2028D7BCAD0081BD1F /* core.h */; }; + 709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2128D7BCAD0081BD1F /* os.cpp */; }; + 709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 709B8D2228D7BCAD0081BD1F /* format.cpp */; }; + 709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2428D7BCAD0081BD1F /* chrono.h */; }; + 709B8D3F28D7BCAD0081BD1F /* os.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2528D7BCAD0081BD1F /* os.h */; }; + 709B8D4128D7BCAD0081BD1F /* color.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2628D7BCAD0081BD1F /* color.h */; }; + 709B8D4328D7BCAD0081BD1F /* args.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2728D7BCAD0081BD1F /* args.h */; }; + 709B8D4528D7BCAD0081BD1F /* printf.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2828D7BCAD0081BD1F /* printf.h */; }; + 709B8D4728D7BCAD0081BD1F /* compile.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2928D7BCAD0081BD1F /* compile.h */; }; + 709B8D4928D7BCAD0081BD1F /* format.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2B28D7BCAD0081BD1F /* format.h */; }; + 709B8D4B28D7BCAD0081BD1F /* std.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D2C28D7BCAD0081BD1F /* std.h */; }; + 709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */ = {isa = PBXBuildFile; fileRef = 709B8D4D28D7C15F0081BD1F /* KramFmt.h */; }; 70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; }; - 70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */; }; 70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; }; - 70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */ = {isa = PBXBuildFile; fileRef = 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */; }; + 70B563A72C857B360089A64F /* KramZipStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B563A52C857B360089A64F /* KramZipStream.cpp */; }; + 70B563A92C857B360089A64F /* KramZipStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B563A62C857B360089A64F /* KramZipStream.h */; }; 70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; }; - 70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */ = {isa = PBXBuildFile; fileRef = 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */; }; 70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; }; - 70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */; }; + 70D222D82AC800AC00B9EA23 /* json11.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222D62AC800AC00B9EA23 /* json11.h */; }; + 70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222D72AC800AC00B9EA23 /* json11.cpp */; }; + 70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */; }; + 70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222DD2AD2132300B9EA23 /* ImmutableString.h */; }; + 70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */; }; + 70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */; }; + 70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222E92ADAF25E00B9EA23 /* simdjson.h */; }; + 70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */; }; + 70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */; }; + 70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */ = {isa = PBXBuildFile; fileRef = 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 706178172DE16211001545E1 /* KramFileIO.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramFileIO.h; sourceTree = ""; }; + 706178182DE16211001545E1 /* KramFileIO.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramFileIO.cpp; sourceTree = ""; }; 706ECDDE26D1577A001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; }; 706EEDAA26D1583E001C950E /* EtcBlock4x4Encoding_RGB8.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4Encoding_RGB8.cpp; sourceTree = ""; }; 706EEDAB26D1583E001C950E /* EtcErrorMetric.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcErrorMetric.h; sourceTree = ""; }; @@ -365,14 +242,6 @@ 706EEDC626D1583E001C950E /* EtcBlock4x4.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = EtcBlock4x4.cpp; sourceTree = ""; }; 706EEDC726D1583E001C950E /* EtcColorFloatRGBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcColorFloatRGBA.h; sourceTree = ""; }; 706EEDC826D1583E001C950E /* EtcBlock4x4Encoding.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = EtcBlock4x4Encoding.h; sourceTree = ""; }; - 706EEDCA26D1583E001C950E /* LICENSE */ = {isa = PBXFileReference; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; - 706EEDCB26D1583E001C950E /* rgbcx.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = ""; }; - 706EEDCC26D1583E001C950E /* bc7enc.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = ""; }; - 706EEDCD26D1583E001C950E /* bc7decomp.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = ""; }; - 706EEDCE26D1583E001C950E /* bc7decomp.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = ""; }; - 706EEDCF26D1583E001C950E /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; - 706EEDD026D1583E001C950E /* bc7enc.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = ""; }; - 706EEDD126D1583E001C950E /* rgbcx_table4.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = ""; }; 706EEDF926D1583E001C950E /* ateencoder.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ateencoder.mm; sourceTree = ""; }; 706EEDFA26D1583E001C950E /* ateencoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ateencoder.h; sourceTree = ""; }; 706EEDFC26D1583E001C950E /* basisu_transcoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = basisu_transcoder.h; sourceTree = ""; }; @@ -404,12 +273,10 @@ 706EEE1A26D1583F001C950E /* KramTimer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramTimer.cpp; sourceTree = ""; }; 706EEE1B26D1583F001C950E /* KTXImage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KTXImage.cpp; sourceTree = ""; }; 706EEE1C26D1583F001C950E /* KramMipper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMipper.cpp; sourceTree = ""; }; - 706EEE1D26D1583F001C950E /* _clang-format */ = {isa = PBXFileReference; lastKnownFileType = text; path = "_clang-format"; sourceTree = ""; }; 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipHelper.cpp; sourceTree = ""; }; 706EEE1F26D1583F001C950E /* TaskSystem.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = TaskSystem.cpp; sourceTree = ""; }; 706EEE2026D1583F001C950E /* KramSDFMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramSDFMipper.h; sourceTree = ""; }; 706EEE2126D1583F001C950E /* KramFileHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramFileHelper.cpp; sourceTree = ""; }; - 706EEE2226D1583F001C950E /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = ""; }; 706EEE2326D1583F001C950E /* KramConfig.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramConfig.h; sourceTree = ""; }; 706EEE2426D1583F001C950E /* KramLog.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramLog.h; sourceTree = ""; }; 706EEE2526D1583F001C950E /* KramImageInfo.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramImageInfo.cpp; sourceTree = ""; }; @@ -422,12 +289,10 @@ 706EEE2C26D1583F001C950E /* KramMmapHelper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMmapHelper.cpp; sourceTree = ""; }; 706EEE2D26D1583F001C950E /* win_mmap.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = win_mmap.h; sourceTree = ""; }; 706EEE2E26D1583F001C950E /* Kram.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Kram.h; sourceTree = ""; }; - 706EEE2F26D1583F001C950E /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = ""; }; 706EEE3026D1583F001C950E /* KTXImage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KTXImage.h; sourceTree = ""; }; 706EEE3126D1583F001C950E /* KramImageInfo.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramImageInfo.h; sourceTree = ""; }; 706EEE3226D1583F001C950E /* KramTimer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramTimer.h; sourceTree = ""; }; 706EEE3326D1583F001C950E /* KramMmapHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMmapHelper.h; sourceTree = ""; }; - 706EEE3426D1583F001C950E /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = ""; }; 706EEE3526D1583F001C950E /* Kram.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Kram.cpp; sourceTree = ""; }; 706EEE3626D1583F001C950E /* KramFileHelper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramFileHelper.h; sourceTree = ""; }; 706EEE3726D1583F001C950E /* KramMipper.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramMipper.h; sourceTree = ""; }; @@ -460,7 +325,6 @@ 706EEE5826D1583F001C950E /* tmpfileplus.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tmpfileplus.cpp; sourceTree = ""; }; 706EEE5926D1583F001C950E /* tmpfileplus.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tmpfileplus.h; sourceTree = ""; }; 706EF12A26D159F9001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; }; - 706EF1E126D166C5001C950E /* libkram-ios.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = "libkram-ios.a"; sourceTree = BUILT_PRODUCTS_DIR; }; 706EFC4126D3473F001C950E /* eaunits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eaunits.h; sourceTree = ""; }; 706EFC4226D3473F001C950E /* version.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = version.h; sourceTree = ""; }; 706EFC4426D3473F001C950E /* eacompilertraits.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = eacompilertraits.h; sourceTree = ""; }; @@ -598,6 +462,26 @@ 706EFD5E26D3473F001C950E /* hashtable.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashtable.cpp; sourceTree = ""; }; 706EFD5F26D3473F001C950E /* red_black_tree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = red_black_tree.cpp; sourceTree = ""; }; 706EFD6026D3473F001C950E /* fixed_pool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fixed_pool.cpp; sourceTree = ""; }; + 707789C62881BA81008A51BC /* bc7enc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7enc.cpp; sourceTree = ""; }; + 707789C72881BA81008A51BC /* bc7enc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7enc.h; sourceTree = ""; }; + 707789C82881BA81008A51BC /* bc7decomp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc7decomp.h; sourceTree = ""; }; + 707789C92881BA81008A51BC /* ert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ert.h; sourceTree = ""; }; + 707789CA2881BA81008A51BC /* rgbcx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rgbcx.cpp; sourceTree = ""; }; + 707789CB2881BA81008A51BC /* rgbcx_table4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4.h; sourceTree = ""; }; + 707789CC2881BA81008A51BC /* utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = utils.cpp; sourceTree = ""; }; + 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx_table4_small.h; sourceTree = ""; }; + 707789CE2881BA81008A51BC /* ert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ert.cpp; sourceTree = ""; }; + 707789CF2881BA81008A51BC /* rgbcx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rgbcx.h; sourceTree = ""; }; + 707789D02881BA81008A51BC /* bc7decomp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp.cpp; sourceTree = ""; }; + 707789D12881BA81008A51BC /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 707789D22881BA81008A51BC /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utils.h; sourceTree = ""; }; + 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bc7decomp_ref.cpp; sourceTree = ""; }; + 707789D42881BA81008A51BC /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE; sourceTree = ""; }; + 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rdo_bc_encoder.cpp; sourceTree = ""; }; + 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rdo_bc_encoder.h; sourceTree = ""; }; + 707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KramThreadPool.h; sourceTree = ""; }; + 707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramThreadPool.cpp; sourceTree = ""; }; + 707D4C732CC436A000729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; 70871DA727DDDBCC00D0B9E1 /* astcenc_vecmathlib_common_4.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = astcenc_vecmathlib_common_4.h; sourceTree = ""; }; 70871DA827DDDBCC00D0B9E1 /* astcenc_image.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_image.cpp; sourceTree = ""; }; 70871DA927DDDBCC00D0B9E1 /* astcenc_find_best_partitioning.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = astcenc_find_best_partitioning.cpp; sourceTree = ""; }; @@ -637,10 +521,39 @@ 708A6A8E2708CE4700BA5410 /* bc6h_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_encode.h; sourceTree = ""; }; 708A6A902708CE4700BA5410 /* bc6h_definitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_definitions.h; sourceTree = ""; }; 708A6A922708CE4700BA5410 /* bc6h_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bc6h_utils.h; sourceTree = ""; }; + 709B8D1C28D7BCAD0081BD1F /* ostream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ostream.h; sourceTree = ""; }; + 709B8D1D28D7BCAD0081BD1F /* format-inl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "format-inl.h"; sourceTree = ""; }; + 709B8D1E28D7BCAD0081BD1F /* ranges.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ranges.h; sourceTree = ""; }; + 709B8D1F28D7BCAD0081BD1F /* xchar.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = xchar.h; sourceTree = ""; }; + 709B8D2028D7BCAD0081BD1F /* core.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = core.h; sourceTree = ""; }; + 709B8D2128D7BCAD0081BD1F /* os.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = os.cpp; sourceTree = ""; }; + 709B8D2228D7BCAD0081BD1F /* format.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = format.cpp; sourceTree = ""; }; + 709B8D2428D7BCAD0081BD1F /* chrono.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chrono.h; sourceTree = ""; }; + 709B8D2528D7BCAD0081BD1F /* os.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = os.h; sourceTree = ""; }; + 709B8D2628D7BCAD0081BD1F /* color.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = color.h; sourceTree = ""; }; + 709B8D2728D7BCAD0081BD1F /* args.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = args.h; sourceTree = ""; }; + 709B8D2828D7BCAD0081BD1F /* printf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = printf.h; sourceTree = ""; }; + 709B8D2928D7BCAD0081BD1F /* compile.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = compile.h; sourceTree = ""; }; + 709B8D2A28D7BCAD0081BD1F /* LICENSE.rst */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = LICENSE.rst; sourceTree = ""; }; + 709B8D2B28D7BCAD0081BD1F /* format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = format.h; sourceTree = ""; }; + 709B8D2C28D7BCAD0081BD1F /* std.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = std.h; sourceTree = ""; }; + 709B8D4D28D7C15F0081BD1F /* KramFmt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramFmt.h; sourceTree = ""; }; 70A7BD2E27092A1200DBCCF7 /* hdr_encode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hdr_encode.cpp; sourceTree = ""; }; 70A7BD2F27092A1200DBCCF7 /* hdr_encode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hdr_encode.h; sourceTree = ""; }; + 70B563A52C857B360089A64F /* KramZipStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipStream.cpp; sourceTree = ""; }; + 70B563A62C857B360089A64F /* KramZipStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipStream.h; sourceTree = ""; }; 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramDDSHelper.h; sourceTree = ""; }; 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramDDSHelper.cpp; sourceTree = ""; }; + 70D222D62AC800AC00B9EA23 /* json11.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = json11.h; sourceTree = ""; }; + 70D222D72AC800AC00B9EA23 /* json11.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = json11.cpp; sourceTree = ""; }; + 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ImmutableString.cpp; sourceTree = ""; }; + 70D222DD2AD2132300B9EA23 /* ImmutableString.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ImmutableString.h; sourceTree = ""; }; + 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BlockedLinearAllocator.cpp; sourceTree = ""; }; + 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BlockedLinearAllocator.h; sourceTree = ""; }; + 70D222E92ADAF25E00B9EA23 /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = ""; }; + 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = ""; }; + 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dlmalloc.cpp; sourceTree = ""; }; + 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dlmalloc.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -652,14 +565,6 @@ ); runOnlyForDeploymentPostprocessing = 0; }; - 706EF1DC26D166C5001C950E /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - 706EF1DD26D166C5001C950E /* libate.tbd in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -669,6 +574,7 @@ 706EEDA826D1583E001C950E /* libkram */, 706ECDDF26D1577A001C950E /* Products */, 706EF12926D159F9001C950E /* Frameworks */, + 707D4C732CC436A000729BE0 /* kram.xcconfig */, ); sourceTree = ""; }; @@ -676,7 +582,6 @@ isa = PBXGroup; children = ( 706ECDDE26D1577A001C950E /* libkram.a */, - 706EF1E126D166C5001C950E /* libkram-ios.a */, ); name = Products; sourceTree = ""; @@ -684,8 +589,10 @@ 706EEDA826D1583E001C950E /* libkram */ = { isa = PBXGroup; children = ( + 70D222F32ADAF78300B9EA23 /* allocate */, 708A6A882708CE4700BA5410 /* compressonator */, 706EFC3E26D3473F001C950E /* eastl */, + 709B8D1B28D7BCAD0081BD1F /* fmt */, 706EEDA926D1583E001C950E /* etc2comp */, 706EEDC926D1583E001C950E /* bc7enc */, 706EEDD226D1583E001C950E /* astc-encoder */, @@ -693,8 +600,10 @@ 706EEDFB26D1583E001C950E /* transcoder */, 706EEE1026D1583F001C950E /* miniz */, 706EEE1326D1583F001C950E /* heman */, - 706EEE1626D1583F001C950E /* stb */, 706EEE1826D1583F001C950E /* kram */, + 70D222D52AC800AC00B9EA23 /* json11 */, + 70D222E82ADAF25E00B9EA23 /* simdjson */, + 706EEE1626D1583F001C950E /* stb */, 706EEE3926D1583F001C950E /* squish */, 706EEE4F26D1583F001C950E /* zstd */, 706EEE5326D1583F001C950E /* lodepng */, @@ -744,14 +653,23 @@ 706EEDC926D1583E001C950E /* bc7enc */ = { isa = PBXGroup; children = ( - 706EEDCA26D1583E001C950E /* LICENSE */, - 706EEDCB26D1583E001C950E /* rgbcx.h */, - 706EEDCC26D1583E001C950E /* bc7enc.h */, - 706EEDCD26D1583E001C950E /* bc7decomp.h */, - 706EEDCE26D1583E001C950E /* bc7decomp.cpp */, - 706EEDCF26D1583E001C950E /* README.md */, - 706EEDD026D1583E001C950E /* bc7enc.cpp */, - 706EEDD126D1583E001C950E /* rgbcx_table4.h */, + 707789D32881BA81008A51BC /* bc7decomp_ref.cpp */, + 707789D02881BA81008A51BC /* bc7decomp.cpp */, + 707789C82881BA81008A51BC /* bc7decomp.h */, + 707789C62881BA81008A51BC /* bc7enc.cpp */, + 707789C72881BA81008A51BC /* bc7enc.h */, + 707789CE2881BA81008A51BC /* ert.cpp */, + 707789C92881BA81008A51BC /* ert.h */, + 707789D42881BA81008A51BC /* LICENSE */, + 707789D12881BA81008A51BC /* README.md */, + 707789EF2881BCE2008A51BC /* rdo_bc_encoder.cpp */, + 707789F02881BCE2008A51BC /* rdo_bc_encoder.h */, + 707789CD2881BA81008A51BC /* rgbcx_table4_small.h */, + 707789CB2881BA81008A51BC /* rgbcx_table4.h */, + 707789CA2881BA81008A51BC /* rgbcx.cpp */, + 707789CF2881BA81008A51BC /* rgbcx.h */, + 707789CC2881BA81008A51BC /* utils.cpp */, + 707789D22881BA81008A51BC /* utils.h */, ); path = bc7enc; sourceTree = ""; @@ -863,21 +781,24 @@ children = ( 70CDB64E27A1382600A546C1 /* KramDDSHelper.h */, 70CDB64F27A1382600A546C1 /* KramDDSHelper.cpp */, + 70B563A62C857B360089A64F /* KramZipStream.h */, + 70B563A52C857B360089A64F /* KramZipStream.cpp */, 706EEE1926D1583F001C950E /* KramZipHelper.h */, 706EEE1E26D1583F001C950E /* KramZipHelper.cpp */, 706EEE2326D1583F001C950E /* KramConfig.h */, + 706EEE3126D1583F001C950E /* KramImageInfo.h */, 706EEE2526D1583F001C950E /* KramImageInfo.cpp */, - 706EEE2626D1583F001C950E /* KramImage.cpp */, + 709B8D4D28D7C15F0081BD1F /* KramFmt.h */, 706EEE2726D1583F001C950E /* KramLib.h */, 706EEE2426D1583F001C950E /* KramLog.h */, 706EEE2826D1583F001C950E /* KramLog.cpp */, 706EEE2926D1583F001C950E /* KramVersion.h */, 706EEE2A26D1583F001C950E /* KramImage.h */, + 706EEE2626D1583F001C950E /* KramImage.cpp */, 706EEE2026D1583F001C950E /* KramSDFMipper.h */, 706EEE2B26D1583F001C950E /* KramSDFMipper.cpp */, 706EEE3026D1583F001C950E /* KTXImage.h */, 706EEE1B26D1583F001C950E /* KTXImage.cpp */, - 706EEE3126D1583F001C950E /* KramImageInfo.h */, 706EEE3226D1583F001C950E /* KramTimer.h */, 706EEE1A26D1583F001C950E /* KramTimer.cpp */, 706EEE3326D1583F001C950E /* KramMmapHelper.h */, @@ -886,13 +807,17 @@ 706EEE3526D1583F001C950E /* Kram.cpp */, 706EEE3626D1583F001C950E /* KramFileHelper.h */, 706EEE2126D1583F001C950E /* KramFileHelper.cpp */, + 706178172DE16211001545E1 /* KramFileIO.h */, + 706178182DE16211001545E1 /* KramFileIO.cpp */, 706EEE3726D1583F001C950E /* KramMipper.h */, 706EEE1C26D1583F001C950E /* KramMipper.cpp */, - 706EEE1D26D1583F001C950E /* _clang-format */, 706EEE2D26D1583F001C950E /* win_mmap.h */, - 706EEE2226D1583F001C950E /* sse2neon.h */, - 706EEE3426D1583F001C950E /* float4a.h */, - 706EEE2F26D1583F001C950E /* float4a.cpp */, + 70D222E32AD22BED00B9EA23 /* BlockedLinearAllocator.h */, + 70D222E22AD22BED00B9EA23 /* BlockedLinearAllocator.cpp */, + 70D222DD2AD2132300B9EA23 /* ImmutableString.h */, + 70D222DC2AD2132300B9EA23 /* ImmutableString.cpp */, + 707B2AB22D99BF7A00DD3F0B /* KramThreadPool.h */, + 707B2AB32D99BF7A00DD3F0B /* KramThreadPool.cpp */, 706EEE3826D1583F001C950E /* TaskSystem.h */, 706EEE1F26D1583F001C950E /* TaskSystem.cpp */, ); @@ -1185,6 +1110,56 @@ path = bc6h; sourceTree = ""; }; + 709B8D1B28D7BCAD0081BD1F /* fmt */ = { + isa = PBXGroup; + children = ( + 709B8D1C28D7BCAD0081BD1F /* ostream.h */, + 709B8D2B28D7BCAD0081BD1F /* format.h */, + 709B8D1D28D7BCAD0081BD1F /* format-inl.h */, + 709B8D2228D7BCAD0081BD1F /* format.cpp */, + 709B8D1E28D7BCAD0081BD1F /* ranges.h */, + 709B8D1F28D7BCAD0081BD1F /* xchar.h */, + 709B8D2028D7BCAD0081BD1F /* core.h */, + 709B8D2428D7BCAD0081BD1F /* chrono.h */, + 709B8D2528D7BCAD0081BD1F /* os.h */, + 709B8D2128D7BCAD0081BD1F /* os.cpp */, + 709B8D2628D7BCAD0081BD1F /* color.h */, + 709B8D2728D7BCAD0081BD1F /* args.h */, + 709B8D2828D7BCAD0081BD1F /* printf.h */, + 709B8D2928D7BCAD0081BD1F /* compile.h */, + 709B8D2A28D7BCAD0081BD1F /* LICENSE.rst */, + 709B8D2C28D7BCAD0081BD1F /* std.h */, + ); + path = fmt; + sourceTree = ""; + }; + 70D222D52AC800AC00B9EA23 /* json11 */ = { + isa = PBXGroup; + children = ( + 70D222D62AC800AC00B9EA23 /* json11.h */, + 70D222D72AC800AC00B9EA23 /* json11.cpp */, + ); + path = json11; + sourceTree = ""; + }; + 70D222E82ADAF25E00B9EA23 /* simdjson */ = { + isa = PBXGroup; + children = ( + 70D222E92ADAF25E00B9EA23 /* simdjson.h */, + 70D222EA2ADAF25E00B9EA23 /* simdjson.cpp */, + ); + path = simdjson; + sourceTree = ""; + }; + 70D222F32ADAF78300B9EA23 /* allocate */ = { + isa = PBXGroup; + children = ( + 70D222F72ADAFA1500B9EA23 /* dlmalloc.h */, + 70D222F42ADAF78300B9EA23 /* dlmalloc.cpp */, + ); + path = allocate; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ @@ -1194,6 +1169,7 @@ files = ( 706EEFD126D15984001C950E /* EtcErrorMetric.h in Headers */, 706EEFD226D15984001C950E /* EtcColor.h in Headers */, + 709B8D3D28D7BCAD0081BD1F /* chrono.h in Headers */, 706EEFD326D15984001C950E /* EtcDifferentialTrys.h in Headers */, 706EEFD426D15984001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */, 706EEFD526D15984001C950E /* EtcConfig.h in Headers */, @@ -1205,31 +1181,36 @@ 706EEFDA26D15984001C950E /* EtcBlock4x4EncodingBits.h in Headers */, 706EEFDB26D15984001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */, 706EEFDC26D15984001C950E /* EtcBlock4x4.h in Headers */, + 707789E72881BA81008A51BC /* rgbcx.h in Headers */, 706EEFDD26D15984001C950E /* Etc.h in Headers */, + 707789D72881BA81008A51BC /* bc7enc.h in Headers */, 706EEFDE26D15984001C950E /* EtcImage.h in Headers */, + 709B8D4B28D7BCAD0081BD1F /* std.h in Headers */, + 707B2AB42D99BF7A00DD3F0B /* KramThreadPool.h in Headers */, 70CDB65027A1382700A546C1 /* KramDDSHelper.h in Headers */, + 709B8D4328D7BCAD0081BD1F /* args.h in Headers */, 708A6A9C2708CE4700BA5410 /* bc6h_encode.h in Headers */, 706EEFDF26D15984001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */, 706EEFE026D15984001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */, 706EEFE126D15984001C950E /* EtcColorFloatRGBA.h in Headers */, 706EEFE226D15984001C950E /* EtcBlock4x4Encoding.h in Headers */, - 706EEFE326D15984001C950E /* rgbcx.h in Headers */, - 706EEFE426D15984001C950E /* bc7enc.h in Headers */, - 706EEFE526D15984001C950E /* bc7decomp.h in Headers */, - 706EEFE626D15984001C950E /* rgbcx_table4.h in Headers */, 706EEFF226D15984001C950E /* ateencoder.h in Headers */, 706EEFF326D15984001C950E /* basisu_transcoder.h in Headers */, 70A7BD3227092A1200DBCCF7 /* hdr_encode.h in Headers */, + 709B8D4728D7BCAD0081BD1F /* compile.h in Headers */, 708A6AA02708CE4700BA5410 /* bc6h_definitions.h in Headers */, 706EEFF426D15984001C950E /* basisu_containers.h in Headers */, 70871DD527DDDBCD00D0B9E1 /* astcenc.h in Headers */, + 709B8D4528D7BCAD0081BD1F /* printf.h in Headers */, 706EEFF526D15985001C950E /* basisu_containers_impl.h in Headers */, + 707789EB2881BA81008A51BC /* utils.h in Headers */, 706EEFF626D15985001C950E /* basisu_transcoder_internal.h in Headers */, 70871DF927DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */, 70871DFB27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */, 706EEFF726D15985001C950E /* basisu_global_selector_cb.h in Headers */, 706EEFF826D15985001C950E /* basisu_transcoder_uastc.h in Headers */, 706EEFF926D15985001C950E /* basisu_global_selector_palette.h in Headers */, + 707789E32881BA81008A51BC /* rgbcx_table4_small.h in Headers */, 706EEFFA26D15985001C950E /* basisu.h in Headers */, 706EEFFB26D15985001C950E /* basisu_file_headers.h in Headers */, 706EEFFC26D15985001C950E /* miniz.h in Headers */, @@ -1237,8 +1218,10 @@ 706EEFFE26D15985001C950E /* stb_rect_pack.h in Headers */, 706EEFFF26D15985001C950E /* KramZipHelper.h in Headers */, 706EF00026D15985001C950E /* KramSDFMipper.h in Headers */, - 706EF00126D15985001C950E /* sse2neon.h in Headers */, + 70D222E62AD22BED00B9EA23 /* BlockedLinearAllocator.h in Headers */, + 70B563A92C857B360089A64F /* KramZipStream.h in Headers */, 70871DF127DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */, + 709B8D3128D7BCAD0081BD1F /* ranges.h in Headers */, 706EF00226D15985001C950E /* KramConfig.h in Headers */, 706EF00326D15985001C950E /* KramLog.h in Headers */, 706EF00426D15985001C950E /* KramLib.h in Headers */, @@ -1246,121 +1229,49 @@ 706EF00626D15985001C950E /* KramImage.h in Headers */, 706EF00726D15985001C950E /* win_mmap.h in Headers */, 70871DDD27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */, + 709B8D4F28D7C15F0081BD1F /* KramFmt.h in Headers */, + 707789D92881BA81008A51BC /* bc7decomp.h in Headers */, 706EF00826D15985001C950E /* Kram.h in Headers */, + 70D222E02AD2132300B9EA23 /* ImmutableString.h in Headers */, + 70D222EB2ADAF25E00B9EA23 /* simdjson.h in Headers */, 70871DED27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */, + 707789DB2881BA81008A51BC /* ert.h in Headers */, 706EF00926D15985001C950E /* KTXImage.h in Headers */, 706EF00A26D15985001C950E /* KramImageInfo.h in Headers */, + 707789DF2881BA81008A51BC /* rgbcx_table4.h in Headers */, 70871DF727DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */, 706EF00B26D15985001C950E /* KramTimer.h in Headers */, 706EF00C26D15985001C950E /* KramMmapHelper.h in Headers */, - 706EF00D26D15985001C950E /* float4a.h in Headers */, 706EF00E26D15985001C950E /* KramFileHelper.h in Headers */, + 709B8D3F28D7BCAD0081BD1F /* os.h in Headers */, 706EF00F26D15985001C950E /* KramMipper.h in Headers */, 706EF01026D15985001C950E /* TaskSystem.h in Headers */, 706EF01126D15985001C950E /* squish.h in Headers */, 706EF01226D15985001C950E /* clusterfit.h in Headers */, + 709B8D3528D7BCAD0081BD1F /* core.h in Headers */, 706EF01326D15985001C950E /* colourfit.h in Headers */, 70871DFD27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */, 706EF01426D15985001C950E /* alpha.h in Headers */, + 709B8D4128D7BCAD0081BD1F /* color.h in Headers */, 708A6A982708CE4700BA5410 /* bc6h_decode.h in Headers */, 706EF01526D15985001C950E /* singlecolourfit.h in Headers */, 706EF01626D15985001C950E /* maths.h in Headers */, + 707789F32881BCE2008A51BC /* rdo_bc_encoder.h in Headers */, + 70D222D82AC800AC00B9EA23 /* json11.h in Headers */, 706EF01726D15985001C950E /* colourset.h in Headers */, 708A6AA42708CE4700BA5410 /* bc6h_utils.h in Headers */, 706EF01826D15985001C950E /* colourblock.h in Headers */, 706EF01926D15985001C950E /* rangefit.h in Headers */, 706EF01A26D15985001C950E /* zstd.h in Headers */, 70871DF327DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */, + 709B8D2F28D7BCAD0081BD1F /* format-inl.h in Headers */, + 709B8D2D28D7BCAD0081BD1F /* ostream.h in Headers */, 706EF01B26D15985001C950E /* lodepng.h in Headers */, + 709B8D4928D7BCAD0081BD1F /* format.h in Headers */, + 706178192DE16211001545E1 /* KramFileIO.h in Headers */, + 70D222F82ADAFA1500B9EA23 /* dlmalloc.h in Headers */, 706EF01C26D15985001C950E /* tmpfileplus.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - 706EF14A26D166C5001C950E /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - 706EF14B26D166C5001C950E /* EtcErrorMetric.h in Headers */, - 706EF14C26D166C5001C950E /* EtcColor.h in Headers */, - 706EF14D26D166C5001C950E /* EtcDifferentialTrys.h in Headers */, - 706EF14E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8.h in Headers */, - 706EF14F26D166C5001C950E /* EtcConfig.h in Headers */, - 70871DCA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_common_4.h in Headers */, - 706EF15026D166C5001C950E /* EtcBlock4x4Encoding_R11.h in Headers */, - 706EF15126D166C5001C950E /* EtcBlock4x4Encoding_RG11.h in Headers */, - 706EF15226D166C5001C950E /* EtcMath.h in Headers */, - 706EF15326D166C5001C950E /* EtcIndividualTrys.h in Headers */, - 706EF15426D166C5001C950E /* EtcBlock4x4EncodingBits.h in Headers */, - 706EF15526D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.h in Headers */, - 706EF15626D166C5001C950E /* EtcBlock4x4.h in Headers */, - 706EF15726D166C5001C950E /* Etc.h in Headers */, - 706EF15826D166C5001C950E /* EtcImage.h in Headers */, - 70CDB65127A1382700A546C1 /* KramDDSHelper.h in Headers */, - 708A6A9D2708CE4700BA5410 /* bc6h_encode.h in Headers */, - 706EF15926D166C5001C950E /* EtcBlock4x4Encoding_ETC1.h in Headers */, - 706EF15A26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.h in Headers */, - 706EF15B26D166C5001C950E /* EtcColorFloatRGBA.h in Headers */, - 706EF15C26D166C5001C950E /* EtcBlock4x4Encoding.h in Headers */, - 706EF15D26D166C5001C950E /* rgbcx.h in Headers */, - 706EF15E26D166C5001C950E /* bc7enc.h in Headers */, - 706EF15F26D166C5001C950E /* bc7decomp.h in Headers */, - 706EF16026D166C5001C950E /* rgbcx_table4.h in Headers */, - 706EF16C26D166C5001C950E /* ateencoder.h in Headers */, - 706EF16D26D166C5001C950E /* basisu_transcoder.h in Headers */, - 70A7BD3327092A1200DBCCF7 /* hdr_encode.h in Headers */, - 708A6AA12708CE4700BA5410 /* bc6h_definitions.h in Headers */, - 706EF16E26D166C5001C950E /* basisu_containers.h in Headers */, - 70871DD627DDDBCD00D0B9E1 /* astcenc.h in Headers */, - 706EF16F26D166C5001C950E /* basisu_containers_impl.h in Headers */, - 706EF17026D166C5001C950E /* basisu_transcoder_internal.h in Headers */, - 70871DFA27DDDBCD00D0B9E1 /* astcenc_vecmathlib_avx2_8.h in Headers */, - 70871DFC27DDDBCD00D0B9E1 /* astcenc_vecmathlib_none_4.h in Headers */, - 706EF17126D166C5001C950E /* basisu_global_selector_cb.h in Headers */, - 706EF17226D166C5001C950E /* basisu_transcoder_uastc.h in Headers */, - 706EF17326D166C5001C950E /* basisu_global_selector_palette.h in Headers */, - 706EF17426D166C5001C950E /* basisu.h in Headers */, - 706EF17526D166C5001C950E /* basisu_file_headers.h in Headers */, - 706EF17626D166C5001C950E /* miniz.h in Headers */, - 706EF17726D166C5001C950E /* hedistance.h in Headers */, - 706EF17826D166C5001C950E /* stb_rect_pack.h in Headers */, - 706EF17926D166C5001C950E /* KramZipHelper.h in Headers */, - 706EF17A26D166C5001C950E /* KramSDFMipper.h in Headers */, - 706EF17B26D166C5001C950E /* sse2neon.h in Headers */, - 70871DF227DDDBCD00D0B9E1 /* astcenc_mathlib.h in Headers */, - 706EF17C26D166C5001C950E /* KramConfig.h in Headers */, - 706EF17D26D166C5001C950E /* KramLog.h in Headers */, - 706EF17E26D166C5001C950E /* KramLib.h in Headers */, - 706EF17F26D166C5001C950E /* KramVersion.h in Headers */, - 706EF18026D166C5001C950E /* KramImage.h in Headers */, - 706EF18126D166C5001C950E /* win_mmap.h in Headers */, - 70871DDE27DDDBCD00D0B9E1 /* astcenc_vecmathlib_sse_4.h in Headers */, - 706EF18226D166C5001C950E /* Kram.h in Headers */, - 70871DEE27DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.h in Headers */, - 706EF18326D166C5001C950E /* KTXImage.h in Headers */, - 706EF18426D166C5001C950E /* KramImageInfo.h in Headers */, - 70871DF827DDDBCD00D0B9E1 /* astcenc_vecmathlib_neon_4.h in Headers */, - 706EF18526D166C5001C950E /* KramTimer.h in Headers */, - 706EF18626D166C5001C950E /* KramMmapHelper.h in Headers */, - 706EF18726D166C5001C950E /* float4a.h in Headers */, - 706EF18826D166C5001C950E /* KramFileHelper.h in Headers */, - 706EF18926D166C5001C950E /* KramMipper.h in Headers */, - 706EF18A26D166C5001C950E /* TaskSystem.h in Headers */, - 706EF18B26D166C5001C950E /* squish.h in Headers */, - 706EF18C26D166C5001C950E /* clusterfit.h in Headers */, - 706EF18D26D166C5001C950E /* colourfit.h in Headers */, - 70871DFE27DDDBCD00D0B9E1 /* astcenc_vecmathlib.h in Headers */, - 706EF18E26D166C5001C950E /* alpha.h in Headers */, - 708A6A992708CE4700BA5410 /* bc6h_decode.h in Headers */, - 706EF18F26D166C5001C950E /* singlecolourfit.h in Headers */, - 706EF19026D166C5001C950E /* maths.h in Headers */, - 706EF19126D166C5001C950E /* colourset.h in Headers */, - 708A6AA52708CE4700BA5410 /* bc6h_utils.h in Headers */, - 706EF19226D166C5001C950E /* colourblock.h in Headers */, - 706EF19326D166C5001C950E /* rangefit.h in Headers */, - 706EF19426D166C5001C950E /* zstd.h in Headers */, - 70871DF427DDDBCD00D0B9E1 /* astcenc_internal.h in Headers */, - 706EF19526D166C5001C950E /* lodepng.h in Headers */, - 706EF19626D166C5001C950E /* tmpfileplus.h in Headers */, + 709B8D3328D7BCAD0081BD1F /* xchar.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -1384,30 +1295,14 @@ productReference = 706ECDDE26D1577A001C950E /* libkram.a */; productType = "com.apple.product-type.library.static"; }; - 706EF14926D166C5001C950E /* kram-ios */ = { - isa = PBXNativeTarget; - buildConfigurationList = 706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */; - buildPhases = ( - 706EF14A26D166C5001C950E /* Headers */, - 706EF19726D166C5001C950E /* Sources */, - 706EF1DC26D166C5001C950E /* Frameworks */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "kram-ios"; - productName = kram; - productReference = 706EF1E126D166C5001C950E /* libkram-ios.a */; - productType = "com.apple.product-type.library.static"; - }; /* End PBXNativeTarget section */ /* Begin PBXProject section */ 706ECDD626D1577A001C950E /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 1240; + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1500; TargetAttributes = { 706ECDDD26D1577A001C950E = { CreatedOnToolsVersion = 12.4; @@ -1421,7 +1316,6 @@ knownRegions = ( en, Base, - English, english, ); mainGroup = 706ECDD526D1577A001C950E; @@ -1430,7 +1324,6 @@ projectRoot = ""; targets = ( 706ECDDD26D1577A001C950E /* kram */, - 706EF14926D166C5001C950E /* kram-ios */, ); }; /* End PBXProject section */ @@ -1440,31 +1333,36 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 702E0DB62CA10BC100B652B7 /* astcenc_mathlib.cpp in Sources */, 70871DD727DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */, + 70D222F52ADAF78300B9EA23 /* dlmalloc.cpp in Sources */, + 707789E52881BA81008A51BC /* ert.cpp in Sources */, 70871E0327DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */, 70871DD127DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */, 70871DDF27DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */, 706EF26426D17DCC001C950E /* ateencoder.mm in Sources */, + 707789ED2881BA81008A51BC /* bc7decomp_ref.cpp in Sources */, 706EEF7F26D1595D001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */, 70871DCD27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */, 70CDB65227A1382700A546C1 /* KramDDSHelper.cpp in Sources */, + 70D222ED2ADAF25E00B9EA23 /* simdjson.cpp in Sources */, 706EEF8026D1595D001C950E /* EtcImage.cpp in Sources */, 70871DEB27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */, + 70D222E42AD22BED00B9EA23 /* BlockedLinearAllocator.cpp in Sources */, 706EEF8126D1595D001C950E /* EtcDifferentialTrys.cpp in Sources */, 706EEF8226D1595D001C950E /* EtcMath.cpp in Sources */, 706EEF8326D1595D001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */, 706EEF8426D1595D001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */, 706EEF8526D1595D001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */, 706EEF8726D1595D001C950E /* EtcIndividualTrys.cpp in Sources */, + 707789DD2881BA81008A51BC /* rgbcx.cpp in Sources */, 706EEF8826D1595D001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */, + 707789F12881BCE2008A51BC /* rdo_bc_encoder.cpp in Sources */, 70871DF527DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */, 706EEF8926D1595D001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */, 706EEF8A26D1595D001C950E /* EtcBlock4x4Encoding.cpp in Sources */, 706EEF8B26D1595D001C950E /* EtcBlock4x4.cpp in Sources */, 70871DDB27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */, - 706EEF8C26D1595D001C950E /* bc7decomp.cpp in Sources */, - 70871DE127DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */, - 706EEF8D26D1595D001C950E /* bc7enc.cpp in Sources */, 708A6A9A2708CE4700BA5410 /* bc6h_encode.cpp in Sources */, 70A7BD3027092A1200DBCCF7 /* hdr_encode.cpp in Sources */, 706EFF7726D34740001C950E /* string.cpp in Sources */, @@ -1476,7 +1374,9 @@ 70871DE327DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */, 70871E0727DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */, 70871E0527DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */, + 707789D52881BA81008A51BC /* bc7enc.cpp in Sources */, 706EFF7F26D34740001C950E /* intrusive_list.cpp in Sources */, + 707789E92881BA81008A51BC /* bc7decomp.cpp in Sources */, 706EEFA826D1595D001C950E /* miniz.cpp in Sources */, 70871DE527DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */, 706EEFA926D1595D001C950E /* hedistance.cpp in Sources */, @@ -1494,13 +1394,17 @@ 706EEFB226D1595D001C950E /* KramLog.cpp in Sources */, 706EEFB326D1595D001C950E /* KramSDFMipper.cpp in Sources */, 706EEFB426D1595D001C950E /* KramMmapHelper.cpp in Sources */, + 709B8D3928D7BCAD0081BD1F /* format.cpp in Sources */, + 70D222DE2AD2132300B9EA23 /* ImmutableString.cpp in Sources */, + 7061781A2DE16211001545E1 /* KramFileIO.cpp in Sources */, 70871DCB27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */, - 706EEFB526D1595D001C950E /* float4a.cpp in Sources */, 706EFF7326D34740001C950E /* thread_support.cpp in Sources */, 706EEFB626D1595D001C950E /* Kram.cpp in Sources */, 706EEFB726D1595D001C950E /* squish.cpp in Sources */, 706EEFB826D1595D001C950E /* colourset.cpp in Sources */, + 707B2AB52D99BF7A00DD3F0B /* KramThreadPool.cpp in Sources */, 70871DD327DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */, + 709B8D3728D7BCAD0081BD1F /* os.cpp in Sources */, 706EFF8126D34740001C950E /* hashtable.cpp in Sources */, 70871DEF27DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */, 70871DD927DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */, @@ -1510,113 +1414,31 @@ 706EEFBD26D1595D001C950E /* colourblock.cpp in Sources */, 706EEFBE26D1595E001C950E /* colourfit.cpp in Sources */, 70871DFF27DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */, + 70B563A72C857B360089A64F /* KramZipStream.cpp in Sources */, 70871E0927DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */, 70871DCF27DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */, - 706EFF7926D34740001C950E /* allocator_eastl.cpp in Sources */, + 70D222DA2AC800AC00B9EA23 /* json11.cpp in Sources */, 706EEFC026D1595E001C950E /* maths.cpp in Sources */, 706EEFC126D1595E001C950E /* singlecolourfit.cpp in Sources */, 706EEFC226D1595E001C950E /* zstd.cpp in Sources */, 706EEFC326D1595E001C950E /* zstddeclib.cpp in Sources */, 706EEFC426D1595E001C950E /* lodepng.cpp in Sources */, + 707789E12881BA81008A51BC /* utils.cpp in Sources */, 706EEFC526D1595E001C950E /* tmpfileplus.cpp in Sources */, 70871E0127DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; - 706EF19726D166C5001C950E /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - 70871DD827DDDBCD00D0B9E1 /* astcenc_quantization.cpp in Sources */, - 70871E0427DDDBCD00D0B9E1 /* astcenc_color_unquantize.cpp in Sources */, - 70871DD227DDDBCD00D0B9E1 /* astcenc_averages_and_directions.cpp in Sources */, - 70871DE027DDDBCD00D0B9E1 /* astcenc_mathlib_softfloat.cpp in Sources */, - 706EFC2426D1C39B001C950E /* ateencoder.mm in Sources */, - 706EF19826D166C5001C950E /* EtcBlock4x4Encoding_RGB8.cpp in Sources */, - 70871DCE27DDDBCD00D0B9E1 /* astcenc_find_best_partitioning.cpp in Sources */, - 70CDB65327A1382700A546C1 /* KramDDSHelper.cpp in Sources */, - 706EF19926D166C5001C950E /* EtcImage.cpp in Sources */, - 70871DEC27DDDBCD00D0B9E1 /* astcenc_block_sizes.cpp in Sources */, - 706EF19A26D166C5001C950E /* EtcDifferentialTrys.cpp in Sources */, - 706EF19B26D166C5001C950E /* EtcMath.cpp in Sources */, - 706EF19C26D166C5001C950E /* EtcBlock4x4Encoding_RGBA8.cpp in Sources */, - 706EF19D26D166C5001C950E /* EtcBlock4x4Encoding_RG11.cpp in Sources */, - 706EF19E26D166C5001C950E /* EtcBlock4x4Encoding_RGB8A1.cpp in Sources */, - 706EF19F26D166C5001C950E /* EtcIndividualTrys.cpp in Sources */, - 706EF1A026D166C5001C950E /* EtcBlock4x4Encoding_R11.cpp in Sources */, - 70871DF627DDDBCD00D0B9E1 /* astcenc_color_quantize.cpp in Sources */, - 706EF1A126D166C5001C950E /* EtcBlock4x4Encoding_ETC1.cpp in Sources */, - 706EF1A226D166C5001C950E /* EtcBlock4x4Encoding.cpp in Sources */, - 706EF1A326D166C5001C950E /* EtcBlock4x4.cpp in Sources */, - 70871DDC27DDDBCD00D0B9E1 /* astcenc_percentile_tables.cpp in Sources */, - 706EF1A426D166C5001C950E /* bc7decomp.cpp in Sources */, - 70871DE227DDDBCD00D0B9E1 /* astcenc_mathlib.cpp in Sources */, - 706EF1A526D166C5001C950E /* bc7enc.cpp in Sources */, - 708A6A9B2708CE4700BA5410 /* bc6h_encode.cpp in Sources */, - 70A7BD3127092A1200DBCCF7 /* hdr_encode.cpp in Sources */, - 706EFF7826D34740001C950E /* string.cpp in Sources */, - 708A6A972708CE4700BA5410 /* bc6h_decode.cpp in Sources */, - 706EFF7626D34740001C950E /* assert.cpp in Sources */, - 706EFF8626D34740001C950E /* fixed_pool.cpp in Sources */, - 706EF1BF26D166C5001C950E /* basisu_transcoder.cpp in Sources */, - 706EFF8426D34740001C950E /* red_black_tree.cpp in Sources */, - 70871DE427DDDBCD00D0B9E1 /* astcenc_decompress_symbolic.cpp in Sources */, - 70871E0827DDDBCD00D0B9E1 /* astcenc_diagnostic_trace.cpp in Sources */, - 70871E0627DDDBCD00D0B9E1 /* astcenc_platform_isa_detection.cpp in Sources */, - 706EFF8026D34740001C950E /* intrusive_list.cpp in Sources */, - 706EF1C026D166C5001C950E /* miniz.cpp in Sources */, - 70871DE627DDDBCD00D0B9E1 /* astcenc_compress_symbolic.cpp in Sources */, - 706EF1C126D166C5001C950E /* hedistance.cpp in Sources */, - 706EF1C226D166C5001C950E /* KramTimer.cpp in Sources */, - 70871DE827DDDBCD00D0B9E1 /* astcenc_entry.cpp in Sources */, - 706EF1C326D166C5001C950E /* KTXImage.cpp in Sources */, - 706EF1C426D166C5001C950E /* KramMipper.cpp in Sources */, - 706EF1C526D166C5001C950E /* KramZipHelper.cpp in Sources */, - 706EF1C626D166C5001C950E /* TaskSystem.cpp in Sources */, - 706EF1C726D166C5001C950E /* KramFileHelper.cpp in Sources */, - 706EFF7C26D34740001C950E /* numeric_limits.cpp in Sources */, - 706EF1C826D166C5001C950E /* KramImageInfo.cpp in Sources */, - 70871DEA27DDDBCD00D0B9E1 /* astcenc_integer_sequence.cpp in Sources */, - 706EF1C926D166C5001C950E /* KramImage.cpp in Sources */, - 706EF1CA26D166C5001C950E /* KramLog.cpp in Sources */, - 706EF1CB26D166C5001C950E /* KramSDFMipper.cpp in Sources */, - 706EF1CC26D166C5001C950E /* KramMmapHelper.cpp in Sources */, - 70871DCC27DDDBCD00D0B9E1 /* astcenc_image.cpp in Sources */, - 706EF1CD26D166C5001C950E /* float4a.cpp in Sources */, - 706EFF7426D34740001C950E /* thread_support.cpp in Sources */, - 706EF1CE26D166C5001C950E /* Kram.cpp in Sources */, - 706EF1CF26D166C5001C950E /* squish.cpp in Sources */, - 706EF1D026D166C5001C950E /* colourset.cpp in Sources */, - 70871DD427DDDBCD00D0B9E1 /* astcenc_partition_tables.cpp in Sources */, - 706EFF8226D34740001C950E /* hashtable.cpp in Sources */, - 70871DF027DDDBCD00D0B9E1 /* astcenc_weight_align.cpp in Sources */, - 70871DDA27DDDBCD00D0B9E1 /* astcenc_compute_variance.cpp in Sources */, - 706EF1D126D166C5001C950E /* clusterfit.cpp in Sources */, - 706EF1D226D166C5001C950E /* rangefit.cpp in Sources */, - 706EF1D326D166C5001C950E /* alpha.cpp in Sources */, - 706EF1D426D166C5001C950E /* colourblock.cpp in Sources */, - 706EF1D526D166C5001C950E /* colourfit.cpp in Sources */, - 70871E0027DDDBCD00D0B9E1 /* astcenc_pick_best_endpoint_format.cpp in Sources */, - 70871E0A27DDDBCD00D0B9E1 /* astcenc_ideal_endpoints_and_weights.cpp in Sources */, - 70871DD027DDDBCD00D0B9E1 /* astcenc_symbolic_physical.cpp in Sources */, - 706EFF7A26D34740001C950E /* allocator_eastl.cpp in Sources */, - 706EF1D626D166C5001C950E /* maths.cpp in Sources */, - 706EF1D726D166C5001C950E /* singlecolourfit.cpp in Sources */, - 706EF1D826D166C5001C950E /* zstd.cpp in Sources */, - 706EF1D926D166C5001C950E /* zstddeclib.cpp in Sources */, - 706EF1DA26D166C5001C950E /* lodepng.cpp in Sources */, - 706EF1DB26D166C5001C950E /* tmpfileplus.cpp in Sources */, - 70871E0227DDDBCD00D0B9E1 /* astcenc_weight_quant_xfer_tables.cpp in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; /* End PBXSourcesBuildPhase section */ /* Begin XCBuildConfiguration section */ 706ECDE726D1577A001C950E /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -1627,12 +1449,14 @@ CLANG_WARN_ASSIGN_ENUM = YES; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = NO; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -1650,19 +1474,22 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; + GCC_INCREASE_PRECOMPILED_HEADER_SHARING = YES; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.h"; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + "GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO; GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; @@ -1674,33 +1501,38 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = ""; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; - MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; - MTL_FAST_MATH = YES; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/eastl/include", + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; ONLY_ACTIVE_ARCH = YES; OTHER_CFLAGS = ( + "$(inherited)", "-DCOMPILE_ASTCENC=1", "-DCOMPILE_ATE=1", "-DCOMPILE_ETCENC=1", "-DCOMPILE_SQUISH=1", "-DCOMPILE_BCENC=1", - "-DCOMPILE_EASTL=0", "-DCOMPILE_COMP=1", - "-DCOMPILE_BASIS=1", - "-include", - KramConfig.h, + "-DCOMPILE_BASIS=0", + "-DCOMPILE_EASTL=0", ); SDKROOT = macosx; - USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; + USER_HEADER_SEARCH_PATHS = ""; + XROS_DEPLOYMENT_TARGET = 2.0; }; name = Debug; }; 706ECDE826D1577A001C950E /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C732CC436A000729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -1711,12 +1543,14 @@ CLANG_WARN_ASSIGN_ENUM = YES; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = NO; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_CXX0X_EXTENSIONS = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -1734,13 +1568,20 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_ENABLE_CPP_EXCEPTIONS = NO; GCC_ENABLE_CPP_RTTI = NO; + GCC_INCREASE_PRECOMPILED_HEADER_SHARING = YES; GCC_NO_COMMON_BLOCKS = YES; + GCC_PRECOMPILE_PREFIX_HEADER = YES; + GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramPrefix.h"; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + "GCC_WARN_64_TO_32_BIT_CONVERSION[arch=*64]" = NO; GCC_WARN_ABOUT_MISSING_NEWLINE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; @@ -1752,97 +1593,63 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = ""; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; - MTL_ENABLE_DEBUG_INFO = NO; - MTL_FAST_MATH = YES; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/eastl/include", + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; OTHER_CFLAGS = ( + "$(inherited)", "-DCOMPILE_ASTCENC=1", "-DCOMPILE_ATE=1", "-DCOMPILE_ETCENC=1", "-DCOMPILE_SQUISH=1", "-DCOMPILE_BCENC=1", - "-DCOMPILE_EASTL=0", "-DCOMPILE_COMP=1", - "-DCOMPILE_BASIS=1", - "-include", - KramConfig.h, + "-DCOMPILE_BASIS=0", + "-DCOMPILE_EASTL=0", ); SDKROOT = macosx; - USER_HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; + USER_HEADER_SEARCH_PATHS = ""; + XROS_DEPLOYMENT_TARGET = 2.0; }; name = Release; }; 706ECDEA26D1577A001C950E /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES; CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; - CLANG_X86_VECTOR_INSTRUCTIONS = avx; CODE_SIGN_STYLE = Automatic; EXECUTABLE_PREFIX = lib; - GCC_PRECOMPILE_PREFIX_HEADER = NO; - GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h"; PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = Automatic; SKIP_INSTALL = YES; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SUPPORTS_MACCATALYST = NO; SYSTEM_HEADER_SEARCH_PATHS = ""; + TARGETED_DEVICE_FAMILY = "1,2,7"; }; name = Debug; }; 706ECDEB26D1577A001C950E /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES; CLANG_WARN_OBJC_EXPLICIT_OWNERSHIP_TYPE = YES; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; - CLANG_X86_VECTOR_INSTRUCTIONS = avx; CODE_SIGN_STYLE = Automatic; - DEAD_CODE_STRIPPING = NO; EXECUTABLE_PREFIX = lib; - GCC_PRECOMPILE_PREFIX_HEADER = NO; - GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h"; PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = Automatic; SKIP_INSTALL = YES; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SUPPORTS_MACCATALYST = NO; SYSTEM_HEADER_SEARCH_PATHS = ""; - }; - name = Release; - }; - 706EF1DF26D166C5001C950E /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - ARCHS = arm64; - CLANG_WARN_DOCUMENTATION_COMMENTS = NO; - CLANG_X86_VECTOR_INSTRUCTIONS = default; - CODE_SIGN_STYLE = Automatic; - EXECUTABLE_PREFIX = lib; - GCC_PRECOMPILE_PREFIX_HEADER = NO; - GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h"; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - PRODUCT_NAME = "$(TARGET_NAME)"; - SDKROOT = iphoneos; - SKIP_INSTALL = YES; - SUPPORTED_PLATFORMS = "iphonesimulator iphoneos"; - SYSTEM_HEADER_SEARCH_PATHS = ""; - }; - name = Debug; - }; - 706EF1E026D166C5001C950E /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - ARCHS = arm64; - CLANG_WARN_DOCUMENTATION_COMMENTS = NO; - CLANG_X86_VECTOR_INSTRUCTIONS = default; - CODE_SIGN_STYLE = Automatic; - DEAD_CODE_STRIPPING = NO; - EXECUTABLE_PREFIX = lib; - GCC_PRECOMPILE_PREFIX_HEADER = NO; - GCC_PREFIX_HEADER = "$(PROJECT_DIR)/../libkram/kram/KramConfig.h"; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - PRODUCT_NAME = "$(TARGET_NAME)"; - SDKROOT = iphoneos; - SKIP_INSTALL = YES; - SUPPORTED_PLATFORMS = "iphonesimulator iphoneos"; - SYSTEM_HEADER_SEARCH_PATHS = ""; + TARGETED_DEVICE_FAMILY = "1,2,7"; }; name = Release; }; @@ -1867,15 +1674,6 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 706EF1DE26D166C5001C950E /* Build configuration list for PBXNativeTarget "kram-ios" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - 706EF1DF26D166C5001C950E /* Debug */, - 706EF1E026D166C5001C950E /* Release */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Release; - }; /* End XCConfigurationList section */ }; rootObject = 706ECDD626D1577A001C950E /* Project object */; diff --git a/build2/kram.xcworkspace/contents.xcworkspacedata b/build2/kram.xcworkspace/contents.xcworkspacedata index 6c500c6d..c4f3782a 100644 --- a/build2/kram.xcworkspace/contents.xcworkspacedata +++ b/build2/kram.xcworkspace/contents.xcworkspacedata @@ -16,4 +16,7 @@ + + diff --git a/build2/kramc.xcodeproj/project.pbxproj b/build2/kramc.xcodeproj/project.pbxproj index 0c1c5c27..e422cd4d 100644 --- a/build2/kramc.xcodeproj/project.pbxproj +++ b/build2/kramc.xcodeproj/project.pbxproj @@ -3,10 +3,12 @@ archiveVersion = 1; classes = { }; - objectVersion = 50; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ + 701AF1922CAE4F2300BD0886 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687272CAD1996007ACA58 /* libvectormath.a */; }; + 705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F72BA2DD1100437FAA /* libcompression.tbd */; }; 706EF28326D18251001C950E /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28226D18251001C950E /* libkram.a */; }; 706EF28526D1825D001C950E /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF28426D18257001C950E /* libate.tbd */; }; 706EF28726D18290001C950E /* KramMain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF28026D18223001C950E /* KramMain.cpp */; }; @@ -26,11 +28,14 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 705F68F72BA2DD1100437FAA /* libcompression.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libcompression.tbd; path = usr/lib/libcompression.tbd; sourceTree = SDKROOT; }; 706EF27226D18082001C950E /* kram */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kram; sourceTree = BUILT_PRODUCTS_DIR; }; 706EF28026D18223001C950E /* KramMain.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KramMain.cpp; sourceTree = ""; }; 706EF28226D18251001C950E /* libkram.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libkram.a; sourceTree = BUILT_PRODUCTS_DIR; }; 706EF28426D18257001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; }; 706EF28A26D182CB001C950E /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; + 707D4C542CC420FE00729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; + 70B687272CAD1996007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -38,9 +43,11 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 701AF1922CAE4F2300BD0886 /* libvectormath.a in Frameworks */, 706EF28326D18251001C950E /* libkram.a in Frameworks */, 706EF28B26D182CB001C950E /* Foundation.framework in Frameworks */, 706EF28526D1825D001C950E /* libate.tbd in Frameworks */, + 705F68F82BA2DD2000437FAA /* libcompression.tbd in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -53,6 +60,7 @@ 706EF27E26D18223001C950E /* kramc */, 706EF27326D18082001C950E /* Products */, 706EF28126D18251001C950E /* Frameworks */, + 707D4C542CC420FE00729BE0 /* kram.xcconfig */, ); sourceTree = ""; }; @@ -76,6 +84,8 @@ 706EF28126D18251001C950E /* Frameworks */ = { isa = PBXGroup; children = ( + 70B687272CAD1996007ACA58 /* libvectormath.a */, + 705F68F72BA2DD1100437FAA /* libcompression.tbd */, 706EF28A26D182CB001C950E /* Foundation.framework */, 706EF28426D18257001C950E /* libate.tbd */, 706EF28226D18251001C950E /* libkram.a */, @@ -109,7 +119,8 @@ 706EF26A26D18082001C950E /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 1240; + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1500; TargetAttributes = { 706EF27126D18082001C950E = { CreatedOnToolsVersion = 12.4; @@ -148,8 +159,10 @@ /* Begin XCBuildConfiguration section */ 706EF27726D18082001C950E /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -166,6 +179,7 @@ CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -180,35 +194,43 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; ONLY_ACTIVE_ARCH = YES; + OTHER_CFLAGS = ( + "$(inherited)", + "-include", + KramConfig.h, + ); + PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO; SDKROOT = macosx; }; name = Debug; }; 706EF27826D18082001C950E /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C542CC420FE00729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -225,6 +247,7 @@ CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -239,20 +262,30 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = NO; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; + OTHER_CFLAGS = ( + "$(inherited)", + "-include", + KramConfig.h, + ); + PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO; SDKROOT = macosx; }; name = Release; @@ -267,13 +300,19 @@ CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR; CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; GCC_WARN_SHADOW = YES; GCC_WARN_STRICT_SELECTOR_MATCH = YES; - HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; - MACOSX_DEPLOYMENT_TARGET = 10.15; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc; PRODUCT_NAME = kram; }; name = Debug; @@ -288,14 +327,20 @@ CLANG_WARN_OBJC_INTERFACE_IVARS = YES_ERROR; CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = YES; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; DEAD_CODE_STRIPPING = YES; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; GCC_WARN_SHADOW = YES; GCC_WARN_STRICT_SELECTOR_MATCH = YES; - HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; - MACOSX_DEPLOYMENT_TARGET = 10.15; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramc; PRODUCT_NAME = kram; }; name = Release; diff --git a/build2/kramv.xcodeproj/project.pbxproj b/build2/kramv.xcodeproj/project.pbxproj index 060736bb..5f9c1043 100644 --- a/build2/kramv.xcodeproj/project.pbxproj +++ b/build2/kramv.xcodeproj/project.pbxproj @@ -3,10 +3,13 @@ archiveVersion = 1; classes = { }; - objectVersion = 50; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ + 705F68FA2BA2DD4800437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; }; + 705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; }; + 705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 705F68F92BA2DD3E00437FAA /* libcompression.tbd */; }; 706EF23926D17A81001C950E /* KramViewerMain.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22C26D17A81001C950E /* KramViewerMain.mm */; }; 706EF23B26D17A81001C950E /* KramViewerBase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 706EF22F26D17A81001C950E /* KramViewerBase.cpp */; }; 706EF23C26D17A81001C950E /* KramLoader.mm in Sources */ = {isa = PBXBuildFile; fileRef = 706EF23026D17A81001C950E /* KramLoader.mm */; }; @@ -32,10 +35,15 @@ 708D44D0272B03BF00783DCE /* pbr.txt in Resources */ = {isa = PBXBuildFile; fileRef = 708D44CF272B03BF00783DCE /* pbr.txt */; }; 708D44D4272FA4C800783DCE /* tropical_beach.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D2272FA4C800783DCE /* tropical_beach.ktx */; }; 708D44D5272FA4C800783DCE /* piazza_san_marco.ktx in Resources */ = {isa = PBXBuildFile; fileRef = 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */; }; + 7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */; }; + 70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B5BFF728F5253F00CD83D8 /* CoreText.framework */; }; + 70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687212CAD1962007ACA58 /* libvectormath.a */; }; + 70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687232CAD1976007ACA58 /* libvectormath.a */; }; + 70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 70B687252CAD197E007ACA58 /* libvectormath.a */; }; 70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */; }; 70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; }; 70E33ECE26E536BF00CBA422 /* KramThumbnailProvider.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33ECD26E536BF00CBA422 /* KramThumbnailProvider.mm */; }; - 70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; + 70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EC626E536BF00CBA422 /* kram-thumb.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 70E33ED826E5377000CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; }; 70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; }; 70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; }; @@ -43,7 +51,7 @@ 70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EC926E536BF00CBA422 /* Quartz.framework */; }; 70E33EE726E5478900CBA422 /* KramPreviewViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 70E33EE626E5478900CBA422 /* KramPreviewViewController.mm */; }; 70E33EEA26E5478900CBA422 /* KramPreviewViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 70E33EE826E5478900CBA422 /* KramPreviewViewController.xib */; }; - 70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EE226E5478900CBA422 /* kram-preview.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; + 70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed Foundation Extensions */ = {isa = PBXBuildFile; fileRef = 70E33EE226E5478900CBA422 /* kram-preview.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 70E33EF326E548C700CBA422 /* libkram.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF24826D17BC2001C950E /* libkram.a */; }; 70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 706EF25926D17CAA001C950E /* libate.tbd */; }; 70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 70E33EDA26E5379900CBA422 /* CoreGraphics.framework */; }; @@ -81,21 +89,22 @@ name = "Embed Frameworks"; runOnlyForDeploymentPostprocessing = 0; }; - 70E33ED426E536BF00CBA422 /* Embed App Extensions */ = { + 70E33ED426E536BF00CBA422 /* Embed Foundation Extensions */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; dstPath = ""; dstSubfolderSpec = 13; files = ( - 70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed App Extensions */, - 70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed App Extensions */, + 70E33EEF26E5478900CBA422 /* kram-preview.appex in Embed Foundation Extensions */, + 70E33ED326E536BF00CBA422 /* kram-thumb.appex in Embed Foundation Extensions */, ); - name = "Embed App Extensions"; + name = "Embed Foundation Extensions"; runOnlyForDeploymentPostprocessing = 0; }; /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 705F68F92BA2DD3E00437FAA /* libcompression.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libcompression.tbd; path = usr/lib/libcompression.tbd; sourceTree = SDKROOT; }; 706EF20F26D17A26001C950E /* kramv.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = kramv.app; sourceTree = BUILT_PRODUCTS_DIR; }; 706EF22A26D17A81001C950E /* KramViewerBase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramViewerBase.h; sourceTree = ""; }; 706EF22B26D17A81001C950E /* kramv.entitlements */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.entitlements; path = kramv.entitlements; sourceTree = ""; }; @@ -117,6 +126,7 @@ 706EF25426D17C85001C950E /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; 706EF25626D17C9D001C950E /* AppKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AppKit.framework; path = System/Library/Frameworks/AppKit.framework; sourceTree = SDKROOT; }; 706EF25926D17CAA001C950E /* libate.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = libate.tbd; path = usr/lib/libate.tbd; sourceTree = SDKROOT; }; + 707D4C532CC420E700729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; 7083365D2715642C0077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 70833661271564320077BCB6 /* GLTFMTL.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTFMTL.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 70833664271575E50077BCB6 /* GLTF.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = GLTF.framework; sourceTree = BUILT_PRODUCTS_DIR; }; @@ -128,6 +138,11 @@ 708D44CF272B03BF00783DCE /* pbr.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = pbr.txt; sourceTree = ""; }; 708D44D2272FA4C800783DCE /* tropical_beach.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = tropical_beach.ktx; sourceTree = ""; }; 708D44D3272FA4C800783DCE /* piazza_san_marco.ktx */ = {isa = PBXFileReference; lastKnownFileType = file; path = piazza_san_marco.ktx; sourceTree = ""; }; + 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UniformTypeIdentifiers.framework; path = System/Library/Frameworks/UniformTypeIdentifiers.framework; sourceTree = SDKROOT; }; + 70B5BFF728F5253F00CD83D8 /* CoreText.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreText.framework; path = System/Library/Frameworks/CoreText.framework; sourceTree = SDKROOT; }; + 70B687212CAD1962007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 70B687232CAD1976007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 70B687252CAD197E007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; }; 70E33EC626E536BF00CBA422 /* kram-thumb.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = "kram-thumb.appex"; sourceTree = BUILT_PRODUCTS_DIR; }; 70E33EC726E536BF00CBA422 /* QuickLookThumbnailing.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = QuickLookThumbnailing.framework; path = System/Library/Frameworks/QuickLookThumbnailing.framework; sourceTree = SDKROOT; }; 70E33EC926E536BF00CBA422 /* Quartz.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Quartz.framework; path = System/Library/Frameworks/Quartz.framework; sourceTree = SDKROOT; }; @@ -150,15 +165,19 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 70B687222CAD1962007ACA58 /* libvectormath.a in Frameworks */, + 70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */, + 706EF26726D17DFA001C950E /* libate.tbd in Frameworks */, 706EF24D26D17C30001C950E /* ModelIO.framework in Frameworks */, 706EF25226D17C6F001C950E /* MetalKit.framework in Frameworks */, - 70871D4327CAD3EA00D0B9E1 /* libkram.a in Frameworks */, + 70B5BFF828F5254000CD83D8 /* CoreText.framework in Frameworks */, 706EF25526D17C85001C950E /* Metal.framework in Frameworks */, + 7099CFBD28E8319C008D4ABF /* UniformTypeIdentifiers.framework in Frameworks */, 706EF25726D17C9D001C950E /* AppKit.framework in Frameworks */, 70833669271575EA0077BCB6 /* GLTFMTL.framework in Frameworks */, - 706EF26726D17DFA001C950E /* libate.tbd in Frameworks */, 706EF24F26D17C43001C950E /* Foundation.framework in Frameworks */, 70833665271575E50077BCB6 /* GLTF.framework in Frameworks */, + 705F68FA2BA2DD4800437FAA /* libcompression.tbd in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -166,9 +185,11 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 70B687242CAD1976007ACA58 /* libvectormath.a in Frameworks */, + 70E33ED826E5377000CBA422 /* libkram.a in Frameworks */, 70E33EC826E536BF00CBA422 /* QuickLookThumbnailing.framework in Frameworks */, + 705F68FB2BA2DD5900437FAA /* libcompression.tbd in Frameworks */, 70E33ECA26E536BF00CBA422 /* Quartz.framework in Frameworks */, - 70E33ED826E5377000CBA422 /* libkram.a in Frameworks */, 70E33EDD26E537AD00CBA422 /* Accelerate.framework in Frameworks */, 70E33EDB26E5379900CBA422 /* CoreGraphics.framework in Frameworks */, 70E33ED926E5378800CBA422 /* libate.tbd in Frameworks */, @@ -179,11 +200,13 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + 70B687262CAD197E007ACA58 /* libvectormath.a in Frameworks */, + 70E33EF326E548C700CBA422 /* libkram.a in Frameworks */, 70E33EF526E548D800CBA422 /* CoreGraphics.framework in Frameworks */, + 705F68FC2BA2DD6200437FAA /* libcompression.tbd in Frameworks */, 70E33EF426E548CF00CBA422 /* libate.tbd in Frameworks */, 70E33EF626E548E200CBA422 /* Accelerate.framework in Frameworks */, 70E33EF726E553B900CBA422 /* AppKit.framework in Frameworks */, - 70E33EF326E548C700CBA422 /* libkram.a in Frameworks */, 70E33EE326E5478900CBA422 /* Quartz.framework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -201,6 +224,7 @@ 70E33EE426E5478900CBA422 /* kram-preview */, 706EF21026D17A26001C950E /* Products */, 706EF24726D17BC2001C950E /* Frameworks */, + 707D4C532CC420E700729BE0 /* kram.xcconfig */, ); sourceTree = ""; }; @@ -236,6 +260,12 @@ 706EF24726D17BC2001C950E /* Frameworks */ = { isa = PBXGroup; children = ( + 70B687252CAD197E007ACA58 /* libvectormath.a */, + 70B687232CAD1976007ACA58 /* libvectormath.a */, + 70B687212CAD1962007ACA58 /* libvectormath.a */, + 705F68F92BA2DD3E00437FAA /* libcompression.tbd */, + 70B5BFF728F5253F00CD83D8 /* CoreText.framework */, + 7099CFBC28E8319B008D4ABF /* UniformTypeIdentifiers.framework */, 70833668271575EA0077BCB6 /* GLTFMTL.framework */, 70833664271575E50077BCB6 /* GLTF.framework */, 70833661271564320077BCB6 /* GLTFMTL.framework */, @@ -315,7 +345,7 @@ 706EF20B26D17A26001C950E /* Sources */, 706EF20C26D17A26001C950E /* Frameworks */, 706EF20D26D17A26001C950E /* Resources */, - 70E33ED426E536BF00CBA422 /* Embed App Extensions */, + 70E33ED426E536BF00CBA422 /* Embed Foundation Extensions */, 70833667271575E50077BCB6 /* Embed Frameworks */, ); buildRules = ( @@ -369,7 +399,8 @@ 706EF20726D17A26001C950E /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 1240; + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1540; TargetAttributes = { 706EF20E26D17A26001C950E = { CreatedOnToolsVersion = 12.4; @@ -502,8 +533,11 @@ /* Begin XCBuildConfiguration section */ 706EF22126D17A2E001C950E /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -513,13 +547,14 @@ CLANG_ENABLE_OBJC_WEAK = YES; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_COMMA = YES; + CLANG_WARN_COMMA = NO; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -534,44 +569,54 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; DONT_GENERATE_INFOPLIST_FILE = YES; + ENABLE_APP_SANDBOX = YES; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu11; GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; - MACOSX_DEPLOYMENT_TARGET = 11.0; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram", + "$(PROJECT_DIR)/../libkram/eastl/include", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; MTL_FAST_MATH = YES; MTL_LANGUAGE_REVISION = UseDeploymentTarget; ONLY_ACTIVE_ARCH = YES; OTHER_CFLAGS = ( - "-DCOMPILE_EASTL=0", + "$(inherited)", "-include", KramConfig.h, ); - OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO; SDKROOT = macosx; }; name = Debug; }; 706EF22226D17A2E001C950E /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C532CC420E700729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; @@ -581,13 +626,14 @@ CLANG_ENABLE_OBJC_WEAK = YES; CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_COMMA = YES; + CLANG_WARN_COMMA = NO; CLANG_WARN_CONSTANT_CONVERSION = YES; CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_IMPLICIT_FALLTHROUGH = YES_ERROR; CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; @@ -602,11 +648,17 @@ CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DONT_GENERATE_INFOPLIST_FILE = YES; + ENABLE_APP_SANDBOX = YES; + ENABLE_HARDENED_RUNTIME = YES; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; @@ -614,17 +666,23 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = "$(PROJECT_DIR)/../libkram/kram"; - MACOSX_DEPLOYMENT_TARGET = 11.0; + HEADER_SEARCH_PATHS = ( + "$(PROJECT_DIR)/../libkram/kram", + "$(PROJECT_DIR)/../libkram", + "$(PROJECT_DIR)/../libkram/eastl/include", + "$(PROJECT_DIR)/../libkram/vectormath", + ); + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; MTL_FAST_MATH = YES; MTL_LANGUAGE_REVISION = UseDeploymentTarget; OTHER_CFLAGS = ( - "-DCOMPILE_EASTL=0", + "$(inherited)", "-include", KramConfig.h, ); - OTHER_CPLUSPLUSFLAGS = "$(OTHER_CFLAGS)"; + PRESERVE_DEAD_CODE_INITS_AND_TERMS = NO; SDKROOT = macosx; }; name = Release; @@ -642,22 +700,29 @@ CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements"; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application"; + CODE_SIGN_STYLE = Manual; COMBINE_HIDPI_IMAGES = YES; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; GCC_WARN_64_TO_32_BIT_CONVERSION = NO; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; GCC_WARN_SHADOW = YES; GCC_WARN_STRICT_SELECTOR_MATCH = YES; INFOPLIST_FILE = ../kramv/Info.plist; + INFOPLIST_KEY_CFBundleDisplayName = kramv; + INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv; + MARKETING_VERSION = 2.0.0; + MTL_LANGUAGE_REVISION = Metal30; + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerProvision; }; name = Debug; }; @@ -674,23 +739,30 @@ CLANG_WARN_OBJC_MISSING_PROPERTY_SYNTHESIS = NO; CLANG_WARN_OBJC_REPEATED_USE_OF_WEAK = YES; CODE_SIGN_ENTITLEMENTS = "$(PROJECT_DIR)/../kramv/kramv.entitlements"; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application"; + CODE_SIGN_STYLE = Manual; COMBINE_HIDPI_IMAGES = YES; DEAD_CODE_STRIPPING = YES; - DEVELOPMENT_TEAM = ""; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; GCC_WARN_64_TO_32_BIT_CONVERSION = NO; GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; GCC_WARN_SHADOW = YES; GCC_WARN_STRICT_SELECTOR_MATCH = YES; INFOPLIST_FILE = ../kramv/Info.plist; + INFOPLIST_KEY_CFBundleDisplayName = kramv; + INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = com.ba.kramv; + MARKETING_VERSION = 2.0.0; + MTL_LANGUAGE_REVISION = Metal30; + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.kramv; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerProvision; }; name = Release; }; @@ -698,17 +770,19 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; INFOPLIST_FILE = "../kram-thumb/Info.plist"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", "@executable_path/../../../../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb"; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb"; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerThumbProvision; SKIP_INSTALL = YES; }; name = Debug; @@ -717,17 +791,19 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-thumb/kram_thumb.entitlements"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; INFOPLIST_FILE = "../kram-thumb/Info.plist"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", "@executable_path/../../../../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-thumb"; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-thumb"; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerThumbProvision; SKIP_INSTALL = YES; }; name = Release; @@ -736,17 +812,19 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; INFOPLIST_FILE = "../kram-preview/Info.plist"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", "@executable_path/../../../../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview"; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview"; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerPreviewProvision; SKIP_INSTALL = YES; }; name = Debug; @@ -755,17 +833,19 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_ENTITLEMENTS = "$(SRCROOT)/../kram-preview/kram_preview.entitlements"; - CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; INFOPLIST_FILE = "../kram-preview/Info.plist"; LD_RUNPATH_SEARCH_PATHS = ( "$(inherited)", "@executable_path/../Frameworks", "@executable_path/../../../../Frameworks", ); - MACOSX_DEPLOYMENT_TARGET = 10.15; - PRODUCT_BUNDLE_IDENTIFIER = "com.ba.kramv.kram-preview"; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kramv.kram-preview"; PRODUCT_NAME = "$(TARGET_NAME)"; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramViewerPreviewProvision; SKIP_INSTALL = YES; }; name = Release; diff --git a/build2/vectormath.xcodeproj/project.pbxproj b/build2/vectormath.xcodeproj/project.pbxproj new file mode 100644 index 00000000..3b4487e0 --- /dev/null +++ b/build2/vectormath.xcodeproj/project.pbxproj @@ -0,0 +1,378 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 77; + objects = { + +/* Begin PBXBuildFile section */ + 70570FE52CB378EE005692BB /* bounds234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70570FE42CB378E7005692BB /* bounds234.h */; }; + 70570FE92CB379C9005692BB /* bounds234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70570FE72CB37997005692BB /* bounds234.cpp */; }; + 70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687002CAD1072007ACA58 /* float234.cpp */; }; + 70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FC2CAD1072007ACA58 /* double234.cpp */; }; + 70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B687092CAD1072007ACA58 /* vectormath234.cpp */; }; + 70B6870E2CAD1072007ACA58 /* float4a.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70B686FE2CAD1072007ACA58 /* float4a.cpp */; }; + 70B6870F2CAD1072007ACA58 /* vectormath234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687082CAD1072007ACA58 /* vectormath234.h */; }; + 70B687102CAD1072007ACA58 /* long234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687032CAD1072007ACA58 /* long234.h */; }; + 70B687112CAD1072007ACA58 /* sse2neon.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687062CAD1072007ACA58 /* sse2neon.h */; }; + 70B687122CAD1072007ACA58 /* sse_mathfun.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687052CAD1072007ACA58 /* sse_mathfun.h */; }; + 70B687132CAD1072007ACA58 /* float234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FF2CAD1072007ACA58 /* float234.h */; }; + 70B687142CAD1072007ACA58 /* half234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687012CAD1072007ACA58 /* half234.h */; }; + 70B687152CAD1072007ACA58 /* int234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687022CAD1072007ACA58 /* int234.h */; }; + 70B687162CAD1072007ACA58 /* float4a.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FD2CAD1072007ACA58 /* float4a.h */; }; + 70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */; }; + 70B687182CAD1072007ACA58 /* double234.h in Headers */ = {isa = PBXBuildFile; fileRef = 70B686FB2CAD1072007ACA58 /* double234.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 70570FE42CB378E7005692BB /* bounds234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bounds234.h; sourceTree = ""; }; + 70570FE72CB37997005692BB /* bounds234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bounds234.cpp; sourceTree = ""; }; + 70570FEF2CB8C5C6005692BB /* module.modulemap */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.module-map"; path = module.modulemap; sourceTree = ""; }; + 707D4C522CC41F3900729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; + 7087895C2CC6C17700E34A6B /* simdk.py */ = {isa = PBXFileReference; lastKnownFileType = text.script.python; name = simdk.py; path = ../scripts/simdk.py; sourceTree = SOURCE_ROOT; }; + 70B686F42CAD1026007ACA58 /* libvectormath.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libvectormath.a; sourceTree = BUILT_PRODUCTS_DIR; }; + 70B686FB2CAD1072007ACA58 /* double234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = double234.h; sourceTree = ""; }; + 70B686FC2CAD1072007ACA58 /* double234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = double234.cpp; sourceTree = ""; }; + 70B686FD2CAD1072007ACA58 /* float4a.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float4a.h; sourceTree = ""; }; + 70B686FE2CAD1072007ACA58 /* float4a.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float4a.cpp; sourceTree = ""; }; + 70B686FF2CAD1072007ACA58 /* float234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = float234.h; sourceTree = ""; }; + 70B687002CAD1072007ACA58 /* float234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = float234.cpp; sourceTree = ""; }; + 70B687012CAD1072007ACA58 /* half234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = half234.h; sourceTree = ""; }; + 70B687022CAD1072007ACA58 /* int234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = int234.h; sourceTree = ""; }; + 70B687032CAD1072007ACA58 /* long234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = long234.h; sourceTree = ""; }; + 70B687042CAD1072007ACA58 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 70B687052CAD1072007ACA58 /* sse_mathfun.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse_mathfun.h; sourceTree = ""; }; + 70B687062CAD1072007ACA58 /* sse2neon.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sse2neon.h; sourceTree = ""; }; + 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "sse2neon-arm64.h"; sourceTree = ""; }; + 70B687082CAD1072007ACA58 /* vectormath234.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = vectormath234.h; sourceTree = ""; }; + 70B687092CAD1072007ACA58 /* vectormath234.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = vectormath234.cpp; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 70B686F22CAD1026007ACA58 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 70B686EB2CAD1026007ACA58 = { + isa = PBXGroup; + children = ( + 70B6870A2CAD1072007ACA58 /* vectormath */, + 70B686F52CAD1026007ACA58 /* Products */, + 707D4C522CC41F3900729BE0 /* kram.xcconfig */, + ); + sourceTree = ""; + }; + 70B686F52CAD1026007ACA58 /* Products */ = { + isa = PBXGroup; + children = ( + 70B686F42CAD1026007ACA58 /* libvectormath.a */, + ); + name = Products; + sourceTree = ""; + }; + 70B6870A2CAD1072007ACA58 /* vectormath */ = { + isa = PBXGroup; + children = ( + 7087895C2CC6C17700E34A6B /* simdk.py */, + 70570FEF2CB8C5C6005692BB /* module.modulemap */, + 70B687042CAD1072007ACA58 /* README.md */, + 70B686FB2CAD1072007ACA58 /* double234.h */, + 70B686FC2CAD1072007ACA58 /* double234.cpp */, + 70B686FD2CAD1072007ACA58 /* float4a.h */, + 70B686FE2CAD1072007ACA58 /* float4a.cpp */, + 70B686FF2CAD1072007ACA58 /* float234.h */, + 70B687002CAD1072007ACA58 /* float234.cpp */, + 70570FE42CB378E7005692BB /* bounds234.h */, + 70570FE72CB37997005692BB /* bounds234.cpp */, + 70B687012CAD1072007ACA58 /* half234.h */, + 70B687022CAD1072007ACA58 /* int234.h */, + 70B687032CAD1072007ACA58 /* long234.h */, + 70B687052CAD1072007ACA58 /* sse_mathfun.h */, + 70B687062CAD1072007ACA58 /* sse2neon.h */, + 70B687072CAD1072007ACA58 /* sse2neon-arm64.h */, + 70B687082CAD1072007ACA58 /* vectormath234.h */, + 70B687092CAD1072007ACA58 /* vectormath234.cpp */, + ); + name = vectormath; + path = ../libkram/vectormath; + sourceTree = SOURCE_ROOT; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 70B686F02CAD1026007ACA58 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 70B6870F2CAD1072007ACA58 /* vectormath234.h in Headers */, + 70570FE52CB378EE005692BB /* bounds234.h in Headers */, + 70B687102CAD1072007ACA58 /* long234.h in Headers */, + 70B687112CAD1072007ACA58 /* sse2neon.h in Headers */, + 70B687122CAD1072007ACA58 /* sse_mathfun.h in Headers */, + 70B687132CAD1072007ACA58 /* float234.h in Headers */, + 70B687142CAD1072007ACA58 /* half234.h in Headers */, + 70B687152CAD1072007ACA58 /* int234.h in Headers */, + 70B687162CAD1072007ACA58 /* float4a.h in Headers */, + 70B687172CAD1072007ACA58 /* sse2neon-arm64.h in Headers */, + 70B687182CAD1072007ACA58 /* double234.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 70B686F32CAD1026007ACA58 /* vectormath */ = { + isa = PBXNativeTarget; + buildConfigurationList = 70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */; + buildPhases = ( + 70B686F02CAD1026007ACA58 /* Headers */, + 70B686F12CAD1026007ACA58 /* Sources */, + 70B686F22CAD1026007ACA58 /* Frameworks */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = vectormath; + packageProductDependencies = ( + ); + productName = vectormath; + productReference = 70B686F42CAD1026007ACA58 /* libvectormath.a */; + productType = "com.apple.product-type.library.static"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 70B686EC2CAD1026007ACA58 /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastUpgradeCheck = 1600; + TargetAttributes = { + 70B686F32CAD1026007ACA58 = { + CreatedOnToolsVersion = 16.0; + }; + }; + }; + buildConfigurationList = 70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 70B686EB2CAD1026007ACA58; + minimizedProjectReferenceProxies = 1; + preferredProjectObjectVersion = 77; + productRefGroup = 70B686F52CAD1026007ACA58 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 70B686F32CAD1026007ACA58 /* vectormath */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 70B686F12CAD1026007ACA58 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 70B6870B2CAD1072007ACA58 /* float234.cpp in Sources */, + 70B6870C2CAD1072007ACA58 /* double234.cpp in Sources */, + 70B6870D2CAD1072007ACA58 /* vectormath234.cpp in Sources */, + 70570FE92CB379C9005692BB /* bounds234.cpp in Sources */, + 70B6870E2CAD1072007ACA58 /* float4a.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 70B686F62CAD1026007ACA58 /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + DEFINES_MODULE = YES; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 1; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MODULEMAP_FILE = ../libkram/vectormath/module.modulemap; + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++"; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = Automatic; + XROS_DEPLOYMENT_TARGET = 2.0; + }; + name = Debug; + }; + 70B686F72CAD1026007ACA58 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C522CC41F3900729BE0 /* kram.xcconfig */; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = "$(ARCHS_STANDARD)"; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + DEFINES_MODULE = YES; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MODULEMAP_FILE = ../libkram/vectormath/module.modulemap; + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c c++"; + SDKROOT = Automatic; + XROS_DEPLOYMENT_TARGET = 2.0; + }; + name = Release; + }; + 70B686F92CAD1026007ACA58 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES; + CODE_SIGN_STYLE = Automatic; + EXECUTABLE_PREFIX = lib; + PRODUCT_NAME = "$(TARGET_NAME)"; + REGISTER_APP_GROUPS = NO; + SKIP_INSTALL = YES; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SUPPORTS_MACCATALYST = NO; + TARGETED_DEVICE_FAMILY = "1,2,7"; + }; + name = Debug; + }; + 70B686FA2CAD1026007ACA58 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALLOW_TARGET_PLATFORM_SPECIALIZATION = YES; + CODE_SIGN_STYLE = Automatic; + EXECUTABLE_PREFIX = lib; + PRODUCT_NAME = "$(TARGET_NAME)"; + REGISTER_APP_GROUPS = NO; + SKIP_INSTALL = YES; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SUPPORTS_MACCATALYST = NO; + TARGETED_DEVICE_FAMILY = "1,2,7"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 70B686EF2CAD1026007ACA58 /* Build configuration list for PBXProject "vectormath" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 70B686F62CAD1026007ACA58 /* Debug */, + 70B686F72CAD1026007ACA58 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 70B686F82CAD1026007ACA58 /* Build configuration list for PBXNativeTarget "vectormath" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 70B686F92CAD1026007ACA58 /* Debug */, + 70B686FA2CAD1026007ACA58 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 70B686EC2CAD1026007ACA58 /* Project object */; +} diff --git a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj index 047e484f..9a63773c 100644 --- a/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj +++ b/gtlf/GLTF/GLTF.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 46; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ @@ -56,6 +56,7 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; 83319297202589FC00B6C7E9 /* GLTFBinaryChunk.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFBinaryChunk.h; sourceTree = ""; }; 8331929B20258A4000B6C7E9 /* GLTFBinaryChunk.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GLTFBinaryChunk.m; sourceTree = ""; }; 8331929E2025911D00B6C7E9 /* GLTFExtensionNames.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFExtensionNames.m; sourceTree = ""; }; @@ -144,6 +145,7 @@ 83D6FF7E1F48BBFA00F71E0C /* Info.plist */, 83D6FF491F48BB3A00F71E0C /* Products */, 83D600391F48C2FF00F71E0C /* Frameworks */, + 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */, ); sourceTree = ""; }; @@ -282,7 +284,8 @@ 83D6FF3F1F48BB3A00F71E0C /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 0930; + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1500; ORGANIZATIONNAME = "Warren Moore"; TargetAttributes = { 83D6FF471F48BB3A00F71E0C = { @@ -293,11 +296,11 @@ }; buildConfigurationList = 83D6FF421F48BB3A00F71E0C /* Build configuration list for PBXProject "GLTF" */; compatibilityVersion = "Xcode 3.2"; - developmentRegion = English; + developmentRegion = en; hasScannedForEncodings = 0; knownRegions = ( - English, en, + Base, ); mainGroup = 83D6FF3E1F48BB3A00F71E0C; productRefGroup = 83D6FF491F48BB3A00F71E0C /* Products */; @@ -352,8 +355,10 @@ /* Begin XCBuildConfiguration section */ 83D6FF4E1F48BB3A00F71E0C /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; @@ -375,35 +380,38 @@ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_X86_VECTOR_INSTRUCTIONS = avx2; CODE_SIGN_IDENTITY = "-"; COPY_PHASE_STRIP = NO; CURRENT_PROJECT_VERSION = 1; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; + OTHER_CFLAGS = "$(KRAM_FLAGS)"; SDKROOT = macosx; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; @@ -414,6 +422,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; @@ -435,18 +444,24 @@ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = NO; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_X86_VECTOR_INSTRUCTIONS = avx2; CODE_SIGN_IDENTITY = "-"; COPY_PHASE_STRIP = NO; CURRENT_PROJECT_VERSION = 1; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; @@ -454,9 +469,10 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; + OTHER_CFLAGS = "$(KRAM_FLAGS)"; SDKROOT = macosx; VERSIONING_SYSTEM = "apple-generic"; VERSION_INFO_PREFIX = ""; @@ -466,6 +482,7 @@ 83D6FF511F48BB3A00F71E0C /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + ARCHS = arm64; CLANG_ENABLE_OBJC_WEAK = YES; CODE_SIGN_IDENTITY = ""; COMBINE_HIDPI_IMAGES = YES; @@ -477,8 +494,13 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; - IPHONEOS_DEPLOYMENT_TARGET = 10.0; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/Frameworks", + ); + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11"; PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; @@ -489,7 +511,9 @@ }; 83D6FF521F48BB3A00F71E0C /* Release */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C5C2CC42C2700729BE0 /* kram.xcconfig */; buildSettings = { + ARCHS = arm64; CLANG_ENABLE_OBJC_WEAK = YES; CODE_SIGN_IDENTITY = ""; COMBINE_HIDPI_IMAGES = YES; @@ -501,8 +525,13 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = Info.plist; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; - IPHONEOS_DEPLOYMENT_TARGET = 10.0; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/Frameworks", + ); + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11"; PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTF; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; diff --git a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h index 5044f4e4..2806b2bd 100644 --- a/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h +++ b/gtlf/GLTF/Headers/Extensions/GLTFKHRLight.h @@ -15,6 +15,7 @@ // #import +#import //@import simd; diff --git a/gtlf/GLTF/Headers/GLTFCamera.h b/gtlf/GLTF/Headers/GLTFCamera.h index 348a44ca..b88e7ace 100644 --- a/gtlf/GLTF/Headers/GLTFCamera.h +++ b/gtlf/GLTF/Headers/GLTFCamera.h @@ -19,6 +19,8 @@ //@import simd; +#import + NS_ASSUME_NONNULL_BEGIN @class GLTFNode; diff --git a/gtlf/GLTF/Headers/GLTFEnums.h b/gtlf/GLTF/Headers/GLTFEnums.h index 25a8d832..cee4a71a 100644 --- a/gtlf/GLTF/Headers/GLTFEnums.h +++ b/gtlf/GLTF/Headers/GLTFEnums.h @@ -16,7 +16,7 @@ #import -#import +//#import //@import Foundation; typedef NS_ENUM(NSInteger, GLTFDataType) { diff --git a/gtlf/GLTF/Headers/GLTFObject.h b/gtlf/GLTF/Headers/GLTFObject.h index 9d5c7654..7a9eef01 100644 --- a/gtlf/GLTF/Headers/GLTFObject.h +++ b/gtlf/GLTF/Headers/GLTFObject.h @@ -15,7 +15,7 @@ // #import -#import +//#import //@import Foundation; NS_ASSUME_NONNULL_BEGIN diff --git a/gtlf/GLTF/Headers/GLTFTexture.h b/gtlf/GLTF/Headers/GLTFTexture.h index 5cf0357c..b3062500 100644 --- a/gtlf/GLTF/Headers/GLTFTexture.h +++ b/gtlf/GLTF/Headers/GLTFTexture.h @@ -16,6 +16,7 @@ #import #import +#import //@import simd; diff --git a/gtlf/GLTF/Headers/GLTFUtilities.h b/gtlf/GLTF/Headers/GLTFUtilities.h index 88d65925..b41ce931 100644 --- a/gtlf/GLTF/Headers/GLTFUtilities.h +++ b/gtlf/GLTF/Headers/GLTFUtilities.h @@ -16,6 +16,8 @@ #import +#import + //@import Foundation; //@import simd; diff --git a/gtlf/GLTF/Source/GLTFAnimation.m b/gtlf/GLTF/Source/GLTFAnimation.m index 24ccd08f..46d153de 100644 --- a/gtlf/GLTF/Source/GLTFAnimation.m +++ b/gtlf/GLTF/Source/GLTFAnimation.m @@ -19,6 +19,7 @@ #import "GLTFBufferView.h" #import "GLTFBuffer.h" #import "GLTFNode.h" +#import @implementation GLTFAnimationSampler diff --git a/gtlf/GLTF/Source/GLTFCamera.m b/gtlf/GLTF/Source/GLTFCamera.m index fd2d9c27..4b8f09c1 100644 --- a/gtlf/GLTF/Source/GLTFCamera.m +++ b/gtlf/GLTF/Source/GLTFCamera.m @@ -15,6 +15,7 @@ // #import "GLTFCamera.h" +#import @interface GLTFCamera () @property (nonatomic, assign, getter=projectionMatrixIsDirty) BOOL projectionMatrixDirty; diff --git a/gtlf/GLTF/Source/GLTFNode.m b/gtlf/GLTF/Source/GLTFNode.m index 52dd177f..6459d24a 100644 --- a/gtlf/GLTF/Source/GLTFNode.m +++ b/gtlf/GLTF/Source/GLTFNode.m @@ -18,6 +18,7 @@ #import "GLTFAccessor.h" #import "GLTFMesh.h" #import "GLTFVertexDescriptor.h" +#import @interface GLTFNode () @property (nonatomic, assign, getter=localTransformIsDirty) BOOL localTransformDirty; diff --git a/gtlf/GLTF/Source/GLTFUtilities.m b/gtlf/GLTF/Source/GLTFUtilities.m index a21bad4b..aa00c8c9 100644 --- a/gtlf/GLTF/Source/GLTFUtilities.m +++ b/gtlf/GLTF/Source/GLTFUtilities.m @@ -16,6 +16,8 @@ #import "GLTFUtilities.h" +#import + bool GLTFBoundingBoxIsEmpty(GLTFBoundingBox b) { return (b.minPoint.x == b.maxPoint.x) && (b.minPoint.y == b.maxPoint.y) && (b.minPoint.z == b.maxPoint.z); } diff --git a/gtlf/GLTF/kram.xcconfig b/gtlf/GLTF/kram.xcconfig new file mode 120000 index 00000000..66b528eb --- /dev/null +++ b/gtlf/GLTF/kram.xcconfig @@ -0,0 +1 @@ +../../build2/kram.xcconfig \ No newline at end of file diff --git a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj index 2573207b..a5ce1a78 100644 --- a/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj +++ b/gtlf/GLTFMTL/GLTFMTL.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 46; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ @@ -25,6 +25,7 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 707D4C5B2CC42C1100729BE0 /* kram.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = kram.xcconfig; sourceTree = ""; }; 839945C91F641E9000642E68 /* GLTFMTLLightingEnvironment.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLLightingEnvironment.h; sourceTree = ""; }; 839945CA1F641E9000642E68 /* GLTFMTLLightingEnvironment.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GLTFMTLLightingEnvironment.m; sourceTree = ""; }; 83AF30CA1FC4DB4D00053BED /* GLTFMTLTextureLoader.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GLTFMTLTextureLoader.h; sourceTree = ""; }; @@ -75,6 +76,7 @@ 83D6FFD91F48BDFB00F71E0C /* Info.plist */, 83D6FFB21F48BCB500F71E0C /* Products */, 83D600341F48C24F00F71E0C /* Frameworks */, + 707D4C5B2CC42C1100729BE0 /* kram.xcconfig */, ); sourceTree = ""; }; @@ -156,7 +158,8 @@ 83D6FFA81F48BCB500F71E0C /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 0930; + BuildIndependentTargetsInParallel = YES; + LastUpgradeCheck = 1500; ORGANIZATIONNAME = "Warren Moore"; TargetAttributes = { 83D6FFB01F48BCB500F71E0C = { @@ -167,11 +170,11 @@ }; buildConfigurationList = 83D6FFAB1F48BCB500F71E0C /* Build configuration list for PBXProject "GLTFMTL" */; compatibilityVersion = "Xcode 3.2"; - developmentRegion = English; + developmentRegion = en; hasScannedForEncodings = 0; knownRegions = ( - English, en, + Base, ); mainGroup = 83D6FFA71F48BCB500F71E0C; productRefGroup = 83D6FFB21F48BCB500F71E0C /* Products */; @@ -212,8 +215,11 @@ /* Begin XCBuildConfiguration section */ 83D6FFB71F48BCB500F71E0C /* Debug */ = { isa = XCBuildConfiguration; + baseConfigurationReference = 707D4C5B2CC42C1100729BE0 /* kram.xcconfig */; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; @@ -235,35 +241,38 @@ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_X86_VECTOR_INSTRUCTIONS = avx2; CODE_SIGN_IDENTITY = "-"; COPY_PHASE_STRIP = NO; CURRENT_PROJECT_VERSION = 1; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = dwarf; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu99; GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - "$(inherited)", - ); GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; GCC_WARN_UNDECLARED_SELECTOR = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; + OTHER_CFLAGS = "$(KRAM_FLAGS)"; SDKROOT = macosx; SUPPORTED_PLATFORMS = "macosx iphoneos"; VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64"; @@ -276,6 +285,8 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; CLANG_ANALYZER_NONNULL = YES; CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; @@ -297,18 +308,24 @@ CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; CLANG_WARN_STRICT_PROTOTYPES = YES; CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_X86_VECTOR_INSTRUCTIONS = avx2; CODE_SIGN_IDENTITY = "-"; COPY_PHASE_STRIP = NO; CURRENT_PROJECT_VERSION = 1; + DEAD_CODE_STRIPPING = YES; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; GCC_NO_COMMON_BLOCKS = YES; GCC_WARN_64_TO_32_BIT_CONVERSION = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; @@ -316,9 +333,10 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; - IPHONEOS_DEPLOYMENT_TARGET = 14.1; - MACOSX_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 15.0; + MACOSX_DEPLOYMENT_TARGET = 13.0; MTL_ENABLE_DEBUG_INFO = NO; + OTHER_CFLAGS = "$(KRAM_FLAGS)"; SDKROOT = macosx; SUPPORTED_PLATFORMS = "macosx iphoneos"; VALID_ARCHS = "i386 x86_64 armv7s armv7 arm64"; @@ -341,7 +359,13 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = "$(SRCROOT)/Info.plist"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/Frameworks", + ); + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11"; PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; @@ -364,7 +388,13 @@ FRAMEWORK_VERSION = A; INFOPLIST_FILE = "$(SRCROOT)/Info.plist"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; - LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/Frameworks", + ); + MODULE_VERIFIER_SUPPORTED_LANGUAGES = "objective-c objective-c++"; + MODULE_VERIFIER_SUPPORTED_LANGUAGE_STANDARDS = "gnu99 gnu++11"; PRODUCT_BUNDLE_IDENTIFIER = net.warrenmoore.GLTFMTL; PRODUCT_NAME = "$(TARGET_NAME)"; SKIP_INSTALL = YES; diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h index acd40e02..17b7f929 100644 --- a/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h +++ b/gtlf/GLTFMTL/Headers/GLTFMTLBufferAllocator.h @@ -16,7 +16,7 @@ #import #import -#import +//#import #import //@import Foundation; diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h index 95a99c44..6aefd850 100644 --- a/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h +++ b/gtlf/GLTFMTL/Headers/GLTFMTLLightingEnvironment.h @@ -16,7 +16,7 @@ #import #import -#import +//#import #import //@import Foundation; diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h index afc00fdd..aad79244 100644 --- a/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h +++ b/gtlf/GLTFMTL/Headers/GLTFMTLTextureLoader.h @@ -16,7 +16,7 @@ #import #import -#import +//#import #import //@import Metal; diff --git a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h index bc0fa5f8..0f183002 100644 --- a/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h +++ b/gtlf/GLTFMTL/Headers/GLTFMTLUtilities.h @@ -16,7 +16,7 @@ #import #import -#import +//#import #import diff --git a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m index 4d388b0b..8293c39b 100644 --- a/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m +++ b/gtlf/GLTFMTL/Source/GLTFMTLRenderer.m @@ -101,8 +101,8 @@ - (instancetype)initWithDevice:(id)device { //_commandQueue = [_device newCommandQueue]; - _viewMatrix = matrix_identity_float4x4; - _projectionMatrix = matrix_identity_float4x4; + //_viewMatrix = matrix_identity_float4x4; + //_projectionMatrix = matrix_identity_float4x4; _drawableSize = CGSizeMake(1, 1); _colorPixelFormat = MTLPixelFormatBGRA8Unorm; @@ -201,10 +201,11 @@ - (void)enqueueReusableBuffer:(id)buffer { texture.label = image.name ? image.name : image.url.lastPathComponent; } else if (image.bufferView != nil) { GLTFBufferView *bufferView = image.bufferView; - NSData *data = [NSData dataWithBytesNoCopy:bufferView.buffer.contents + bufferView.offset length:bufferView.length freeWhenDone:NO]; + const uint8_t* buffer = bufferView.buffer.contents + bufferView.offset; + NSData *data = [NSData dataWithBytesNoCopy:(void*)buffer length:bufferView.length freeWhenDone:NO]; - // TODO: identify jpg data by first 4 chars, hande with textureLoaderJpb - bool isJpg = false; + // identify jpg data by first 3 chars, handle with textureLoaderJpb + bool isJpg = buffer[0] == 0xFF && buffer[1] == 0xD8 && buffer[2] == 0xFF; if (isJpg) texture = [self.textureLoaderJpg newTextureWithData:data options:options error:&error]; diff --git a/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m index 47a6b125..91ec211c 100644 --- a/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m +++ b/gtlf/GLTFMTL/Source/GLTFMTLShaderBuilder.m @@ -68,7 +68,7 @@ @implementation GLTFMTLShaderBuilder pipelineDescriptor.vertexDescriptor = vertexDescriptor; pipelineDescriptor.colorAttachments[0].pixelFormat = colorPixelFormat; - pipelineDescriptor.sampleCount = sampleCount; + pipelineDescriptor.rasterSampleCount = sampleCount; if (submesh.material.alphaMode == GLTFAlphaModeBlend) { pipelineDescriptor.colorAttachments[0].blendingEnabled = YES; diff --git a/gtlf/GLTFMTL/kram.xcconfig b/gtlf/GLTFMTL/kram.xcconfig new file mode 120000 index 00000000..66b528eb --- /dev/null +++ b/gtlf/GLTFMTL/kram.xcconfig @@ -0,0 +1 @@ +../../build2/kram.xcconfig \ No newline at end of file diff --git a/hlslparser/CMakeLists.txt b/hlslparser/CMakeLists.txt new file mode 100644 index 00000000..e4c173dd --- /dev/null +++ b/hlslparser/CMakeLists.txt @@ -0,0 +1,108 @@ +#----------------------------------------------------- +# hlslparser + +# now setup the app project +set(myTargetApp hlslparser) +project(${myTargetApp}) +add_executable(${myTargetApp}) + +#----------------------------------------------------- + +if (BUILD_MAC) + set_target_properties(${myTargetApp} PROPERTIES + XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20" + XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" + XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2" + + # turn off exceptions/rtti + XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO + XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO + + # can't believe this isn't on by default in CMAKE + XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES + + #------------------------- + + # set debug style for apps + XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym" + XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO" + + #------------------------- + # app specific settings + + # this drops app from 762KB to 174KB with only ATE enabled + # note about needing -gfull instead of -gused here or debug info messed up: + # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 + XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" + + #------------------------- + # for now disable signing, and just "sign to run locally" + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.hialec.hlslparser" + XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO" + XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "" + ) + + target_compile_options(${myTargetApp} PRIVATE -W -Wall) + +endif() + +#----------------------------------------------------- + +if (BUILD_WIN) + # When Win rebuilds library, it doesn't relink app to correct code when you + # build the app target project. Breakpoints stop working after any library source edit, + # and you have to rebuild solution to get the correct code to exectute. Since 2014. Try this. + # And BUILD_ALL never launches properly. + # https://cmake.org/pipermail/cmake/2014-October/058798.html + SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) + SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}) + + # TODO: switch to add_target_definitions + + # to turn off exceptions/rtti use /GR and /EHsc replacement + string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + # don't need force with apps, since they only access kram folder files which include KramConfig + + # all warnings, AVX2, and multiprocess compiles, + # eliminate duplicate strings, embed full path + target_compile_options(${myTargetApp} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC) + + # fix STL (don't use -D here, will remove) + target_compile_definitions(${myTargetApp} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0) + + if (CMAKE_BUILD_TYPE EQUAL "Debug") + target_compile_definitions(${myTargetApp} PRIVATE "/INCREMENTAL") + + elseif (CMAKE_BUILD_TYPE EQUAL "Release") + # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this + target_compile_definitions(${myTargetApp} PRIVATE "/OPT:REF") + + # other possibliities + # /GL - whole program optimization + # /Gy - edit and continue with function level linking (no clang) + # /Oi - enable intrinsic functions + + endif() + +endif() + + +#------------------ + +file(GLOB_RECURSE appSources CONFIGURE_DEPENDS + "${PROJECT_SOURCE_DIR}/*.cpp" + "${PROJECT_SOURCE_DIR}/*.h" +) + +source_group(TREE "${PROJECT_SOURCE_DIR}" PREFIX "source" FILES ${appSources}) + +target_include_directories(${myTargetApp} PRIVATE + "${PROJECT_SOURCE_DIR}" +) + +target_sources(${myTargetApp} PRIVATE ${appSources}) + + diff --git a/hlslparser/LICENSE b/hlslparser/LICENSE new file mode 100644 index 00000000..7439620b --- /dev/null +++ b/hlslparser/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2013-2014 Unknown Worlds Entertainment, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/hlslparser/README.md b/hlslparser/README.md new file mode 100644 index 00000000..b4093f19 --- /dev/null +++ b/hlslparser/README.md @@ -0,0 +1,460 @@ +HLSLParser +========== + +This version of thekla/hlslparser takes a HLSL2021 syntax that then converts that into modern HLSL and MSL. Special thanks to Max McGuire (@Unknown Worlds) and Ignacio Castano and Jonathan Blow (@Thekla) for releasing this as open-source. I've left out GLSL and DX9 and FX legacy codegen to simplify maintaining the codebase. This is a small amount of code compared with the Krhonos shader tools. + +There are still growing pains to using hlslparser. It can't do all the manipulation that glsc and spirv-cross does to ensure that a valid shader model shader is created. But compiling with DXC/metal should help avoid issues. I still don't know how to resolve reflection, since each compiler generates it's own unique data formats. Reflection is simpler on the spirv path. + +The point of this hlslparser is to preserve comments, generate MSL/HLSL code close to the original sources, and be easy to extend. MSL and HLSL are nearly the same shader language at the core. Typical spirv to MSL transpiles look assembly-like in code flow. spirv-opt and spirv-cross introduces 100's of temp registers into the code, gens 24 character floats, strips comments, can't translate half samplers, and the resulting code isn't simple to step through in Metal GPU capture. At the same time, Apple ignores generating Spirv from MSL, so here we are. Spirv should remain a final assembly format to feed to Vulkan drivers. + +Original sources found here. See for more details: + +https://github.com/Thekla/hlslparser +https://github.com/unknownworlds/hlslparser + +--------------------------------- + +Paths to turn HLSL and SPV into MSL + +* HLSL2021 > hlslparser > HLSL2021 > dxc > SPV +* HLSL2021 > hlslparser > MSL > metal > AIR(metallib) +* +* Reflection: spv > spv-reflect -> refl +* HLSL2021 > dxc -> ? +* +* Transpiling MSL: HLSL2021 > dxc > SPV > spirv-cross > MSL +* Transpiling MSL: HLSL2021 > glslc > SPV > spirv-cross > MSL (fails on simple HLSL) +* +* Variant generation +* HLSL2021 + defines > preprocess > HLSL2021 +* HLSL2021 + specialization > hlslparser +* +* Note this has no shader sources in gpu capture, nor AIR files to accumulate +* HLSL2021 -> dxc -> DXIL -> metal-shaderconverter -> metallib + +--------------------------------- + +DONE +* u/int support +* u/short support, can't interpolate in HLSL +* SSBO support +* compute shader support +* HLSL2021 style syntax +* chop out FX and GLSL support, can use spirv-cross for GLSL +* split sampler and texture +* handle depth textures +* compile HLSL with DXC to SPV +* compile MSL with metalc to AIR/metallib +* u/int64_tN support +* double support - not in MSL, can't interpolate vs/ps must pack to uint +* RWTexture (needs ops) +* Vulkan push constants in HLSL +* fix static constant handling + +TODO: +* include handling +* atomics +* more than half/float/int literals (f.e. u/int, u/long), requires trailing U, L +* passing variables only by value in HLSL vs. value/ref/ptr in MSL +* argument buffers and descriptor sets (root tables for DX?) +* halfio/2/3/4 type for Nvidia/Adreno, halfst2/3/4 for storage +* specialization/function constants for variants (MSL/SPV only) +* numgroups designator for DX kernel +* ray-tracing kernels +* tile shader kernels - may be MSL and Android SPV specific +* triangulation shaders (geom, mesh, hull/etc) - platform specific +* generate reflection data from parse of HLSL +* handle reflection (spirv-reflect?) +* handle HLSL vulkan extension constructs, convert these to MSL kernels too +* preprocessor for handling includes, platform specifics, and variants +* fix shaders to not structify metal and mod the source names, turn on written, currently handling globals. Could require passing elements from main shader. +* poor syntax highlighting of output .metal file, does Xcode have to compile? +* no syntax highlighting of .hlsl files in Xcode, but VSCode has HLSL but not MSL + +Shader Editor +* Xcode has the worst syntax highlighting of any IDE with an undocumented plugin api that is constantly broken by newer version of Xcode. Hightlighting only works if files are compiled by IDE, but using CLI tool. +* May want to switch to VSCode for shader development +* Also Windows VS2022 has HLSL add-on from Tim Jones, but this doesn't work with VS on macOS +* https://marketplace.visualstudio.com/items?itemName=doublebuffer.metal-shader&utm_source=VSCode.pro&utm_campaign=AhmadAwais + +--------------------------------- + +Overview +--- + +|Code | | +|--------------|-------------------| +|HLSLTokenizer | produces HLSLTree | +|HLSLParser | produces HLSLTree | +|HLSLTree | AST tree of the source HLSL | +||| +|MSLGenerator | convert HLSLTree to MSL | +|HLSLGenerator | to a DX10-style HLSL | +|GLSLGenerator | unsupported | +||| +|Engine | some string and log helpers | +||| +|ShaderMSL.h | macros/functions to translate to MSL | +|ShaderHLSL.h | to HLSL | +||| +|buildShaders | build hlslparser first, and update ShaderMSL/HLSL.h files, then run this script. Runs hlslparer to generate MSL/HLSL, then runs that through DXC/Metal compiler. | + +|Apps | | +|--------------|-------------------| +|hlslparser | convert dx9 style HLSL to DX10 HLSL and MSL | +|DXC | Microsoft's open-source compiler, gens HLSL 6.0-6.6 DXIL, and spv1.0-1.2, clang-based optimizer, installed with Vulkan SDK | +|glslc | Google's wrapper to glslang, preprocessor, reflection, see below | +|glslang | GLSL and HLSL compiler, but doesn't compile valid HLSL half code, only SM 5.1 | +|spirv-opt | spv optimizer | +|spirv-cross | transpile spv to MSL, HLSL, and GLSL, but codegen has 100's of temp vars, no comments, can target specific MSL/HLSL models | +|spirv-reflect | gens reflection data from spv file | +|metal-shaderconverter | compile dxil to metallib | + +https://github.com/microsoft/DirectXShaderCompiler +https://github.com/KhronosGroup/SPIRV-Cross +https://developer.apple.com/metal/shader-converter/ +https://github.com/google/shaderc + +Dealing with Half +--- +HLSL2021 6.2 includes full half and int support. So that is the compilation target. Note table below before adopting half in shaders. Nvidia/AMD tried to phase out half support on DX10 era, but iOS re-popularized half usage. Android and Nvidia have many dragons using half (see below). Half is only 10-bit mantissa with 5-bit exponent. Even the Mali table below is only all full on more recent hardware. + +| Platforms | iOS/PowerVR, Mali, Adreno | Nvida, AMD, Intel | +| Feature | I | M | A | | N | A | I | +|----------------|---|---|---|-|---|---|---| +| Half Interp | y | y | n | | n | y | y | +| Half UBO | y | y | n | | y | y | y | +| Half Push | y | y | y | | y | n | y | +| Half ALU | y | y | y | | y | y | y | + +* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_16bit_storage.html + +* AMD has no fp16 div/exp on Vega, constant buffer should use fp16. +* https://gpuopen.com/learn/first-steps-implementing-fp16/ + +* StorageBuffer16BitAccess +* UniformAndStorageBuffer16BitAccess +* StoragePushConstant16 +* StorageInputOutput16 + +* There is also the limitation of half interpolation creating banding, and likely why Adreno/Nvidia do not support StorageInputOutput16. Mali recommends using half to minimize parameter buffer storage out of the vertex shader, but then declaring float for the same variables in the fragment shader. This limits sharing input/output structs. + +* Adreno also doesn't support half storage, so this limits SSBO and UBO usage. + +* macOS on M1 - Rosetta2 lacks AVX and f16c cpu support, so translated x64 apps crash. Build Apple Silicon to fix this. Win on ARM emulation (Qcom X Elite) also has the same limitations. Neon is 16 128-bit registers where AVX needs 16 256-bit registers. + +* Android missing cpu arm64+f16 support from Redmi Note 8 and other chips. + vcvt_f32_f16 is still present without this. Do math in fp32x4, then converter to fp16x4. + +Dealing with Double +--- +* HLSL double suport is a joke. Nvidia hobble fp64 output to 1/16th or less of the fp32 performance on GeForce to sell Quadro for CAD. AMD is similar. +* Intel removed fp64 support in Gen11/12/13 and from ARC. +* HLSL only supports 3 ops in DX11.1 - div, rcp, fma. +* HLSL requires touint and todouble to pass between shader stages +* MSL has no fp64 support + +Dealing with uchar4 +--- +* No vertex formats to srgb degamma uchar4 colors, only texture unit has this. +* Compute can't do srgb gamma due to bypass of ROP units +* HLSL lacks uchar type, and only has pack/unpack ops in 6.6 +* Hard to use with SSBO despite uint32 chunks. +* D3DColorToUBYTE4 has annoying bgra swizzle, so don't use it. + +Terms +--- +* Shader Variants - it's good to define which variants of shaders to generate. Can use static and dynamic branching to reduce variant count. Can lead to requiring shader source if can't predefine variant count. +* Specialization Constants - allow variants to be generated within a single shader. Spirv is marked and compiled based on these settings. Metal has equivalent function constants +* Tile shaders - kernels/fragment shaders that run at the tile level. Subpasses in Vulkan. tilegroup memory to and tile data passed from stage to stage without writing back to targets. + + +Mobile HW +--- + +Android were running 2-3w, now up to 6w from better heat spreaders. + +Tesselation differs on mobile vs. desktop. Mali does tesselation after VS (Android Extension Pack for ES). Metal does it before VS. +* https://developer.arm.com/documentation/100587/0100/tessellation/about-tessellation +* https://android.googlesource.com/platform/external/deqp/+/refs/heads/master/external/vulkancts/modules/vulkan/tessellation/vktTessellationGeometryPointSizeTests.cpp +* https://metalbyexample.com/tessellation/ + + +Mali +* TBDR +* Vulkan gpu - Midgard, Bifrost, Valhall, Immortalis +* Vulkan can read but not write SSBO in VS. VS then depends on PS output. Use CS. +* Sparse index buffer limits +* 180MB parameter buffer limit - device lost after exceeded +* Missing VK_POLYGON_MODE_LINE (feature.fillModeNonSolid) - affects debug visuals +* Missing shaderClipDistance and shaderCullDistance +* ARM licenses mobile cpu reference designs +* ARM bought Mali gpu from Falanx Microsystems +* Sets maxDrawIndirectCount = 1, limiting MDI utility +* Raytracing and VRS on Immortalis +* ETC2/ASTC, created ASTC format and encoders, no BC +* https://en.wikipedia.org/wiki/Mali_(processor) +* https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/53672/vulkan-what-should-i-do-about-this-warning-bestpractices-vkcmddrawindexed-sparse-index-buffer +* https://developer.samsung.com/galaxy-gamedev/gamedev-blog/infinitejet.html + +Adreno +* TBDR +* Occlusion queries can cause a switch from TBDR to IMR +* Half shader limits from lack of fp16 storage/inputOuptut +* Qualcomm bought ATI Radeon mobile gpu +* 8K constant cache for UBO +* SSBO bypass 16K uniform cache +* lacks baseInstance support, indirectDrawFirstInstance = false on 4x0/5x0, limiting MDI utility +* ETC2/ASTC, no BC + +PowerVR +* TBDR +* Very little US adoption +* Imagination absorbed into Chinese state tech conglomerate +* Origin of Apple Silicon +* PVRTC/ETC2/ASTC, no BC + +iOS +* TBDR +* locked at GL/ES 3.0 - no compute, use Metal/MSL instead +* A7 - 2/0 cpu, ETC2, 1gb device, Metal introduced and ES3 +* A8 - 2/0, ASTC +* A9 - 2/0 2gb, limited MDI, cpu ICB, baseVertex, baseInstance +* A10 - 2/0, +* A11 - 2/2, gpu ICB, tile shaders, Raster Order Groups, MSAA improvements, more gpus, async compute/raster +* A12 - 2/4, +* A13 - Argument Buffer indirection for material indexing, sparse texturing +* A14/M1 - lossy FBO compression, mesh shaders, +* A15/M2 - no new gpu hw +* A16/M3? - rumored RT hw +* No SamplerMinMax support + +* https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + +macOS +* IMR (Intel), TBDR(M1/A14), TBDR(A15/M2) +* locked at GL4.1 - no compute, clipControl, BC6/7, SSBO, dsa, error callbacks +* M1 has BC texture support, iPad/iPhone still do not +* Intel only has BC support, but architecture is being phased out +* Can use iOS tile shading on M1/M2, may work on last gen Intel?, last Intel RDNA and not RDNA2 +* M1 can only run Windows ARM in Parallels VM +* Parallels Intel can't run DX12/Vulkan, only DX11 +* Bootcamp can't run eGPU, but macOS can +* M1 can run iOS applications natively (but not ARKit) + +Intel +* Skylake - improved fp16 support, sparse texturing +* Alderlake - removes AVX512, 8 HT big, 8 Little = 24 threads (really 16 cores) + +AMD +* scalar execution instead of vector based, compute, unified ALUs for rasterization +* create Mantle API which lead to Vulkan +* GCN1 - wave64, 1 instr/4 cycles, 1 cu = 4 simd16 units +* GCN2 - Puma, PS4(Liverpool)/Xbone(Durango), sparse texturing, 2 async compute + 1 raster/compute pipe +* GCN3 - lossless DCC, high quality video scalar, video encoder/decoder +* GCN4 - Polaris, PS4 Pro(Neo)/XboneX(Scorpio), checkerboarding, degen tri removal, fp16 added back, little point to fp16 +* GCN5 - Vega, fp16 2x, fp64 is 1/16 typically, mesh shaders +* +* RDNA - RX5300, wave32 (or 64), 1 instr/cycle, 2cpu = 1 wgp, mesh shaders, wave32 = 1 simd32 unit, display compression, +* RDNA2 - PS5/XboxX/SteamDeck, RX6000, RT hw, FSR +* RDNA3 - RX7000, better compute/RT hw, multichip module design, +* +* https://en.wikipedia.org/wiki/Graphics_Core_Next +* https://gpuopen.com/wp-content/uploads/2017/03/GDC2017-Advanced-Shader-Programming-On-GCN.pdf +* https://en.wikipedia.org/wiki/RDNA_(microarchitecture) +* https://en.wikipedia.org/wiki/List_of_AMD_graphics_processing_units + +Nvidia +* scalar execution instead of vector based, compute, unified ALUs for rasterization +* tile-based raster/binning in Maxwell (Tegra X1), not on same level as TBDR +* fp16 rates are the same as fp32 on 30x0/40x0, indicating little fp16 support +* but fp16 rates are double on 10x0/20x0 +* 1080 runs fp16 at 1/128th the speed of fp32 - ugh! To hobble ML work +* on GeForce cards and push expensive Quadro. +* Also double was hobbled. +* +* Tegra X1+ - Mariko, Nintendo Switch, ended chip production in 2021 +* 10x0 - 1/64x fp16, +* 16x0 - 2x fp16, +* 20x0 - 2x fp16, tensor cores, RT accel for triangle and bvh intersection +* 30x0 - 1x fp16, 36 fp32 vs. 0.6 fp64 Teraflops on 3090 (60x), faster RT/Tensor cores +* 40x0 - 1x fp16, faster RT/Tensor cores + +* https://en.wikipedia.org/wiki/List_of_Nvidia_graphics_processing_units +* https://www.pcmag.com/news/report-nvidia-to-end-production-of-nintendo-switch-tegra-chip + +Graphics APIs +--- +Metal +* Apple macOS/iOS only API. Now on version 3.0 +* Just works if hw supports. Easy to develop gpu algorithms. +* Argument buffer simplified in Metal 3. + +Vulkan +* derived from AMD Mantle by Khronos to generalized API +* super complex low-level API making dev deal with layout, barriers, and lots of gpu garbage +* complex to enable features and extensions in api and in shaders +* Android 8/9 Vulkan drivers often locked to Vulkan 1.0 without no updates +* better on Windows/Linux where AMD/Intel/Nvidia update drivers +* cryptic validation errors improvement over glGetError() +* https://www.khronos.org/files/vulkan10-reference-guide.pdf +* https://vkguide.dev/ +* https://github.com/SaschaWillems/Vulkan +* https://vulkan.gpuinfo.org/ + +DX9-DX12 +* Microsoft Win only API +* DX12 went super low level for performance +* DX11 is a very nice api with shader stages and compute +* DX11 runs atop Parallels VM (which runs atop Metal) + +OpenGL +* SGI developed IrisGL then OpenGL, 30 years ago, +* stopped at GL4.6 on desktop, GL4.1 on macOS +* Shipped with GLSL +* ES mobile variants, better with extensions +* WebGL lowest common denominator browser variants, extensions often not implemented + +WebGPU +* Metal/DX12/Vulkan lowest common denominator API for browser +* extensions to handle platform specifics +* using WGSL + +Shading Languages +--- + +CG +* Where it all started. C like, multi-entrypoint. +* Nvida has abandoned this +* Unity was using but moved to HLSL +* https://developer.nvidia.com/cg-toolkit + +MSL +* C++14, has ctors, cg/hlsl like, multi-entrypoint +* buffers, textures, tile shaders, atomic int +* Can only sample using float uv, likely to avoid banding/precision issues +* +* https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf + +HLSL +* C++ like, missing ctors, no virtuals, limited op overlaoad, cg derived for original Xbox, multi-entrypoint +* uses mul instead of operator for vec/matrix math +* Added back u/short, half support in SM 6.2 +* double require special packing +* u/char4 doesn't exist, only int/short/uint64, uint64 is hw limited +* Vulkan extensions for specialization constants and subpasses +* SSBO - StructuredBuffers, ByteAddressBuffer +* HLSL 2021 (latest) can distinguish structs with same types as unique +* HLSL added to clang +* https://github.com/Microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst#subpass-inputs +* https://clang.llvm.org/docs/HLSL/HLSLSupport.html#:~:text=HLSL%20uses%20templates%20to%20define,case%20and%20issues%20a%20diagnostic. +* https://devblogs.microsoft.com/directx/announcing-hlsl-2021/ + +Shader Assembly Formats + +SPIRV +* generated by DXC and glslang (glslc) for Vulkan to use +* assembly level constructs that can be transpiled back to other shading languges +* really difficult to read +* clang optimizer +* linked into module +* cannot represent Texture2D, so can't tranpsile to MSL texture2d +* OpTypeImage must be fp32, i32/i64 format only in 1.2 spec. +* https://github.com/microsoft/DirectXShaderCompiler/issues/2711 +* https://www.khronos.org/spir/ + +AIR +* Apple IR format +* clang optimizer +* don't know of human readable version +* linked into metallib + +DXIL +* generate by DXC +* used by DX12 +* lacks specialization constant support +* clang optimizer + + +Shading languages not supported directly by hlslparser, but can transpile via spirv-cross or tint. + +FX +* wrapper around shaders, passes, state +* Microsoft abandoned this +* hlslparser has code to read pass/technique/etc. +* https://github.com/microsoft/FX11 +* https://learn.microsoft.com/en-us/windows/win32/direct3d11/d3d11-effect-format + +GLSL +* dead shader language at GL 4.6, Vulkan/Microsoft will pursue HLSL +* bolted on extensions with Apple, AMD, Nvidia, Intel extending language +* has extension mechanism +* replaced with spirv +* horrible glGetError() requires sync of cpu/gpu +* not multi-entrypoint, each entrypoint called main() +* locked at GL4.1 on macOS - no compute + +GLSL/ES +* even more limited GLSL +* 3.0 on iOS, now emulated by Metal, +* Khronos support ends at 3.1, moved to Vulkan/spirv +* precision modifiers in ES for lowp (no support), mediump (might be fp16, fp24, fp32), highp (fp24 or fp32) +* replaced with spirv +* defaults needelessly removed from uniforms +* dot or .0 required on all floating point numbers or shader fails to compile, int vs. float +* line directives needlessly changed from GLSL + +GLSL/ES (WebGL) +* imposed mobile limits on desktop - browers mostly running gpu on desktop parts, despite Apple adding ES2/3 extensions +* versions ES2 (WebGL1), ES3 (WebGL2), ES3.1 (WebGL2.1) +* lowest common denominator +* wrote Figma using WebGL1, shaders rendering 2d vectors with Photoshop-style blend modes/masks, images +* WebGL1 limited ES2 even more - npot 2d texture limits, no 3d textures, srgb missing for compresed texture, extension not on Safari, no fp64, no dynamic loops in shaders +* WebGL2/2.1 - limited ES3/3.1, added srgb, +* fixed count loops, can break out of loop with uniform +* WebGL2.1(ES3.1) can run compute, but not on Apple platforms +* now emualted by Metal. Angle emulates atop DX11 instead of GL for driver stability, might also have Vulkan backend. +* runs all gpu code in separate browser process, so has to marshall all data over +* can't map/unmap, have to upload changes to buffer/texture +* rewrites shader names and code internally into unreadable mess. Complicates stepping through shader in gpu captures. +* https://www.khronos.org/webgl +* https://webglreport.com/ +* https://www.khronos.org/files/webgl/webgl-reference-card-1_0.pdf +* https://www.khronos.org/files/webgl20-reference-guide.pdf + +WGSL (WebGPU) +* WebGPU shading language originally meant as text form of spirv +* full compute support +* now using Dart like syntax completely unlike CG origin of other languages +* avoids pointers/references +* can transpile spirv to WGSL via tint, WGSL still not in spirv-cross +* converts WGSL back to spirv. +* supposedly Apple didn't want to require spirv. +* similar in api syntax to Metal/Vulkan/DX12 +* https://www.w3.org/TR/webgpu/ +* https://www.w3.org/TR/WGSL/ + +WHLSL (WebGPU) +* WebGPU dropped proposed language that would have been an offshoot of HLSL syntax. +* https://webkit.org/blog/8482/web-high-level-shading-language/ + +--------------------------------- + +Shading Language Versions + +HLSL +* SM 6.2, target, added back int/half support +* SM 6.6, added 8-bit pack ops +* SM 6.7, SampleCmpLevel, RWTexture2DMS, RWTexture2DMSArray, signed texture offsets +* https://microsoft.github.io/DirectX-Specs/d3d/HLSL_SM_6_7_Advanced_Texture_Ops.html + +MSL +* metal2.2, iOS13/macOS10.15, A8 min +* metal2.3, iOS14/macOS11, target, function pointers, A9 min +* metal2.4, iOS15/macOS12, +* metal3.0, iOS16/macOS13, unified shader model, latest, + +Spirv +* 1.1, vulkan1.0 +* 1.3, vulkan1.1 +* 1.5, vulkan1.2, target, diff --git a/hlslparser/build.ninja b/hlslparser/build.ninja new file mode 100755 index 00000000..94d067b0 --- /dev/null +++ b/hlslparser/build.ninja @@ -0,0 +1,179 @@ + +# Note: need absolute paths for Xcode for error/warning clickthrough +# ugh that ninja doesn't have path or wildcard support. +# but coding by hand for now is better than dealing with cmake ninja + +# some handy commands +# ninja -t clean +# ninja -t rules -d +# ninja -t targets + +# ninja_required_version = 1.5 +# include rules.ninja + + +homeDir = /Users/Alec/devref/ + +vulkanSDK = ${homeDir}vulkansdk/1.3.275.0/macOS/bin/ +projectDir = ${homeDir}kram/hlslparser/ +appBuildDir = ${projectDir}/build/hlslparser/Build/Products/Release/ + +hlslparser = ${appBuildDir}hlslparser + +# this is annoying that ninja doesn't cache app modstamps +#rule cleanBuild +# command = ninja -t clean +# description = Clean Build + +# this doesn't have anything to compare, need app timestamps +# also couldn't add to dependsMSL or dependsHLSL +#build clean: cleanBuild | ${hlslparser} + +#------ + +metalCompile = xcrun -sdk macosx metal +metalLib = xcrun -sdk macosx metallib +metalLibStrip = xcrun -sdk macosx metal-dsymutil + +# src files +srcDir = ${projectDir}shaders/ + +# headers and parser gen shaders +#intDir = ${projectDir}outshaders/ + +# compiled shader per platform +# 3.0, 2.4 is first version to support companion sym +dstDir = ${projectDir}out/mac/ +flagsMSL = -std=metal3.0 -Werror -I ${srcDir} + +dependsMSL = ${srcDir}ShaderMSL.h + +# for iOS +#dstDir = ${projectDir}out/ios/ +#flagsMSL = -std=metal3.0 -Werror -I ${srcDir} + +# turn on file/line directives to jump back to original sources +# and turn on comments in generated sources +flagsParser = -g -line +#flagsParser = -g + +# turn on debug, makes metallib 3x bigger and not optimized +#flagsMSLDebug = -g + +# this allows Xcode to load the sym file, don't use with 2.3, or the file is giant. +flagsMSLDebug = -frecord-sources +# flagsMSLDebug = + +# hlslparser msl codegen +rule genMSL + command = $hlslparser $flagsParser -i $in -o $out + description = Gen MSL + +# compile to .air +rule compileMSL + command = $metalCompile $flagsMSLDebug $flagsMSL -c $in -o $out + description = Gen Air + +# https://developer.apple.com/documentation/metal/shader_libraries/compiling_shader_code_into_a_library_with_metal_s_command-line_tools?language=objc +# linker to metallib +rule linkMSL + command = $metalLib -o $out $in + description = Link metallib + +# strip metallib and gen metlalibsym (only for SM2.4+) +rule stripMSL + command = $metalLibStrip -flat -remove-source $in + description = Gen metallibsym + + +# gen air +build ${dstDir}Skinning.metal: genMSL ${srcDir}Skinning.hlsl | ${dependsMSL} +build ${dstDir}Sample.metal: genMSL ${srcDir}Sample.hlsl | ${dependsMSL} +build ${dstDir}Compute.metal: genMSL ${srcDir}Compute.hlsl | ${dependsMSL} + +# this avoids recompilation when there are a lot of files +# but it does mean more compile calls to gen the one metallib and sym +build ${dstDir}Skinning.air: compileMSL ${dstDir}Skinning.metal +build ${dstDir}Sample.air: compileMSL ${dstDir}Sample.metal +build ${dstDir}Compute.air: compileMSL ${dstDir}Compute.metal + +# gen metallib (87K) +build ${dstDir}GameShaders.metallib: linkMSL ${dstDir}Skinning.air ${dstDir}Sample.air ${dstDir}Compute.air + +# this is getting run every time since it strips the metallib, how to prevent that +# also can compile all metal files into both metallib and dsycm +# this also doesn't make metallib any smaller, but is supposed to strip it +# goes from 299K down to 273K, but seems like it should go smaller +# with no debug flags, 87K and sym is 80K w/o debug, prob should skip gen of sym file +# if not in debug + +# sources are still embedded in the 87K version + +# gen metallibsym +build ${dstDir}GameShaders.metallibsym: stripMSL ${dstDir}GameShaders.metallib + +#------- + +dxc = ${vulkanSDK}dxc + +dependsHLSL = ${srcDir}ShaderHLSL.h +dstDirHLSL = ${projectDir}out/android/ + +# turn on debug +# flagsDXDebug = -Zi +flagsDXDebug = + +flagsDXC = -nologo -Zpc -enable-16bit-types -HV 2021 -Werror -fspv-extension=SPV_KHR_shader_draw_parameters -spirv -fspv-target-env=vulkan1.3 -I ${srcDir} + +flagsVS = -T vs_6_2 +flagsPS = -T ps_6_2 +flagsCS = -T cs_6_2 + +# hlslparser hlsl/msl codegen +rule genHLSL + command = $hlslparser $flagsParser -i $in -o $out + description = Gen HLSL + +# this will get shadow replaced +entryPoint = Foo + +# compile to spirv +rule compileVS + command = $dxc $flagsDXC $flagsVS -E ${entryPoint}VS -Fo $out $in + description = Gen spirv VS + +rule compilePS + command = $dxc $flagsDXC $flagsPS -E ${entryPoint}PS -Fo $out $in + description = Gen spirv PS + +rule compileCS + command = $dxc $flagsDXC $flagsCS -E ${entryPoint}CS -Fo $out $in + description = Gen spirv CS + +rule archiveSpriv + command = zip -r $out $in + description = Gen Archive + +# gen hlsl +build ${dstDirHLSL}Skinning.hlsl: genHLSL ${srcDir}Skinning.hlsl | ${dependsHLSL} +build ${dstDirHLSL}Sample.hlsl: genHLSL ${srcDir}Sample.hlsl | ${dependsHLSL} +build ${dstDirHLSL}Compute.hlsl: genHLSL ${srcDir}Compute.hlsl | ${dependsHLSL} + +# gen spv +build ${dstDirHLSL}Skinning.vert.spv: compileVS ${dstDirHLSL}Skinning.hlsl + entryPoint = Skinning +build ${dstDirHLSL}Skinning.frag.spv: compilePS ${dstDirHLSL}Skinning.hlsl + entryPoint = Skinning + +build ${dstDirHLSL}Sample.vert.spv: compileVS ${dstDirHLSL}Sample.hlsl + entryPoint = Sample +build ${dstDirHLSL}Sample.frag.spv: compilePS ${dstDirHLSL}Sample.hlsl + entryPoint = Sample + +build ${dstDirHLSL}Compute.comp.spv: compileCS ${dstDirHLSL}Compute.hlsl + entryPoint = Compute + +# zip spv to single archive (6k) +build ${dstDirHLSL}GameShaders.zip: archiveSpriv ${dstDirHLSL}Skinning.vert.spv ${dstDirHLSL}Skinning.frag.spv ${dstDirHLSL}Sample.vert.spv ${dstDirHLSL}Sample.frag.spv ${dstDirHLSL}Compute.comp.spv + + diff --git a/hlslparser/buildShaders.sh b/hlslparser/buildShaders.sh new file mode 100755 index 00000000..45a24422 --- /dev/null +++ b/hlslparser/buildShaders.sh @@ -0,0 +1,320 @@ +#!/bin/bash + +mkdir -p out + +mkdir -p out/mac +mkdir -p out/win +mkdir -p out/android +mkdir -p out/ios + +# display commands +set -x + +# note bash can't expand tilda, so using HOME instead +# This only works if running from terminal, and not from Xcode +# so back to hardcoding the path. +vulkanSDK="${HOME}/devref/vulkansdk/1.3.275.0/macOS/bin/" +#vulkanSDK="" + +projectDir="${HOME}/devref/kram/hlslparser/" + +srcDir=${projectDir} +srcDir+="shaders/" + +includeDir=${srcDir} + +dstDir=${projectDir} +dstDir+="out/" + +#dstDirOut=${projectDir} +#dstDirOut+="out/" + +# this only pulls the release build, so testing debug won't update +appHlslparser=${projectDir}build/hlslparser/Build/Products/Release/hlslparser + +appDxc=${vulkanSDK} +appGlslc=${vulkanSDK} +appSpirvReflect=${vulkanSDK} +appSpirvCross=${vulkanSDK} + +# compilers +appDxc+="dxc" +appGlslc+="glslc" +appMetalMac="xcrun -sdk macosx metal" +# sdk doesn't seem to need to be iphoneos +appMetaliOS="xcrun -sdk macosx metal" + +# reflect/transpile spv +appSpirvReflect+="spirv-reflect" +appSpirvCross+="spirv-cross" + +# this has to be installed from online installer, this is v1.1 +appMetalShaderConverter="metal-shaderconverter" + +# TODO: also use the metal tools on Win to build +# and already have vulkan sdk + +# Xcode will only do clickthrough to warnings/errors if the filename +# is a full path. That's super annoying. + +parserOptions="" + +# preserve comments +parserOptions+="-g -line " + +# build the metal shaders - mac +pushd out/mac + +echo gen MSL +${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal +${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal +${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal + +popd > /dev/null + +# build the metal shaders - ios +pushd out/ios + +echo gen MSL +${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.metal +${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.metal +${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.metal + +popd > /dev/null + +# build the hlsl shaders - android +pushd out/android + +echo gen HLSL +${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl +${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl +${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.hlsl + +popd > /dev/null + +# build the hlsl shaders - win +pushd out/win + +echo gen HLSL +${appHlslparser} ${parserOptions} -i ${srcDir}Skinning.hlsl -o Skinning.hlsl +${appHlslparser} ${parserOptions} -i ${srcDir}Sample.hlsl -o Sample.hlsl +${appHlslparser} ${parserOptions} -i ${srcDir}Compute.hlsl -o Compute.hlsl + +popd > /dev/null + + +#------------------------------- + +# DONE: metal3.0 on M1 macOS13/iOS16 +# record sources into code for gpu capture (don't ship this), debug mode + +# O2 + size opt +# metalMacOptions+="-Os" + +testMetal=1 + +if [[ $testMetal -eq 1 ]]; then + # Metal is C++14 + metalMacOptions="-frecord-sources -g " + metalMacOptions+="-std=metal3.0 " + + # see if HLSL compiles to MSL (requires macOS Vulkan install) + + echo compile mac to metallib + ${appMetalMac} ${metalMacOptions} -I ${includeDir} -o ${dstDir}mac/GameShaders.metallib ${dstDir}mac/Skinning.metal ${dstDir}mac/Sample.metal ${dstDir}mac/Compute.metal + + + metaliOSOptions="-frecord-sources -g " + metaliOSOptions+="-std=metal3.0 " + + echo compile iOS to metallib + ${appMetaliOS} ${metaliOSOptions} -I ${includeDir} -o ${dstDir}ios/GameShaders.metallib ${dstDir}ios/Skinning.metal ${dstDir}ios/Sample.metal ${dstDir}ios/Compute.metal +fi + + + +#------------------------------- + + +# looks like DXC wants a ps/vs/cs profile, so is expecting one shader per output + +args="-nologo " + +# if this is left out the transpiled MSL has no var names +# debug +args+="-Zi " + +# column matrices +args+="-Zpc " + +# enable half instead of min16float +# can't use for input/output on Adreno or Nvidia +# can't use in push constants on AMD +# also watch interpolation if using for input/output +args+="-enable-16bit-types " + +# default is 2018, but 2021 fixes casting rules of structs with same args +# https://devblogs.microsoft.com/directx/announcing-hlsl-2021/ +args+="-HV 2021 " + +args+="-fspv-extension=SPV_KHR_shader_draw_parameters " + +# 6.1 for ConstantBuffer +# 6.2 for u/short and half <- target +# 6.6 adds u/char8 pack/unpack calls +vsargs=${args} +vsargs+="-T vs_6_2 " + +psargs=${args} +psargs+="-T ps_6_2 " + +csargs=${args} +csargs+="-T cs_6_2 " + +#echo ${vsargs} +#echo ${psargs} + +# more HLSL garbage decoded +# https://therealmjp.github.io/posts/shader-fp16/ +# https://therealmjp.github.io/posts/dxil-linking/ + +# first gen dxil to see if HLSL is valid +# see this garbage here. Can only sign dxil on Windows. +# dxc only loads DXIL.dll on Windows +# https://www.wihlidal.com/blog/pipeline/2018-09-16-dxil-signing-post-compile/ +# no idea what format the refl file from dxil is? + +if [[ $testMetal -eq 1 ]]; then + + pushd out/win + + # echo gen DXIL with dxc + + # TODO: how to link metallib to single metallib? + # can this build to AIR, then build that into metallib? + + # Note this isn't a win file + mscArgsVS="--vertex-stage-in --positionInvariance " + # --enable-gs-ts-emulation --vertex-input-layout-file= + + mscArgsPS=" " + + mscArgsMac="--minimum-gpu-family=Metal3 --deployment-os=macOS --minimum-os-build-version=13.0.0 " + mscArgsiOS="--minimum-gpu-family=Metal3 --deployment-os=iOS --minimum-os-build-version=16.0.0 " + + # build vert + ${appDxc} ${vsargs} -I ${includeDir} -E SkinningVS -Fo Skinning.vert.dxil -Fc Skinning.vert.dxil.txt -Fre Skinning.vert.refl Skinning.hlsl + + ${appMetalShaderConverter} ${mscArgsMac} ${mscArgsVS} Skinning.vert.dxil -o Skinning.vert.mac.metallib + ${appMetalShaderConverter} ${mscArgsiOS} ${mscArgsVS} Skinning.vert.dxil -o Skinning.vert.ios.metallib + + # build frag + ${appDxc} ${psargs} -I ${includeDir} -E SkinningPS -Fo Skinning.frag.dxil -Fc Skinning.frag.dxil.txt -Fre Skinning.frag.refl Skinning.hlsl + + ${appMetalShaderConverter} ${mscArgsMac} ${mscArgsPS} Skinning.frag.dxil -o Skinning.frag.mac.metallib + ${appMetalShaderConverter} ${mscArgsiOS} ${mscArgsPS} Skinning.frag.dxil -o Skinning.frag.ios.metallib + + popd > /dev/null +fi + +# TODO: add other shaders, but also switch to for loop? + + +# Optimization is also delegated to SPIRV-Tools. +# Right now there are no difference between optimization levels greater than zero; +# they will all invoke the same optimization recipe. That is, the recipe behind spirv-opt -O. +# -Os is a special set of options. Can run custom spirv optimizations via +# -Oconfig=--loop-unroll,--scalar-replacement=300,--eliminate-dead-code-aggressive + +# this outputs spv for android, then transpiles it to ios +pushd out/android + +echo gen SPIRV 1.3 with dxc + +${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SkinningVS -Fo Skinning.vert.spv -Fc Skinning.vert.spv.txt Skinning.hlsl +${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SkinningPS -Fo Skinning.frag.spv -Fc Skinning.frag.spv.txt Skinning.hlsl + +${appDxc} ${vsargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SampleVS -Fo Sample.vert.spv -Fc Sample.vert.spv.txt Sample.hlsl +${appDxc} ${psargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E SamplePS -Fo Sample.frag.spv -Fc Sample.frag.spv.txt Sample.hlsl + +${appDxc} ${csargs} -spirv -fspv-target-env=vulkan1.3 -I ${includeDir} -E ComputeCS -Fo Compute.comp.spv -Fc Compute.comp.spv.txt Compute.hlsl + +# -Fre not supported with spirv, so just use spirv-reflect +# either yaml or random format, why can't this just output json? +${appSpirvReflect} -y Skinning.vert.spv > Skinning.vert.refl +${appSpirvReflect} -y Skinning.frag.spv > Skinning.frag.refl +${appSpirvReflect} -y Sample.vert.spv > Sample.vert.refl +${appSpirvReflect} -y Sample.frag.spv > Sample.frag.refl +${appSpirvReflect} -y Compute.comp.spv > Compute.comp.refl + +popd > /dev/null + +# This needs spv from android for now to transpile +if [[ $testMetal -eq 1 ]]; then + + pushd out/ios + + #metaliOSOptions="-frecord-sources -g " + #metaliOSOptions+="-std=ios-metal2.4 " + + # transpile android spirv to ios MSL for comparsion to what hlslparser MSL produces + # would never use this, would use hlslparser path directly or gen spirv + # specific for this target + spvDir=${dstDir}/android/ + + ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Skinning.vert.spv --output Skinning.vert.metal + ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Skinning.frag.spv --output Skinning.frag.metal + ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Sample.vert.spv --output Sample.vert.metal + ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Sample.frag.spv --output Sample.frag.metal + ${appSpirvCross} --msl --msl-version 30000 --msl-ios ${spvDir}Compute.comp.spv --output Compute.comp.metal + + # do includes survive transpile, why does this need -I ?s + # compile to make sure code is valid + ${appMetaliOS} ${metaliOSOptions} -o GameShadersTranspile.metallib -I ${includeDir} Skinning.vert.metal Skinning.frag.metal Sample.vert.metal Sample.frag.metal Compute.comp.metal + + popd > /dev/null +fi + + +# DONE: need to group files into library/module +# also create a readable spv file, so can look through that + +# TODO: create reflect data w/o needing spirv + +# here are flags to use w/DXC + +# dxc can output reflection directly (only for DXIL) +# -Fre Output reflection to the given file + +# add reflect data to aid in generating reflection data +# -fspv-reflect + +# may not need this if doing dxil output, then -Fo might gen dxil asm listing +# -Cc color-coded assembly listing + +# -remove-unused-functions Remove unused functions and types +# -remove-unused-globals Remove unused static globals and functions + + +# Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system +# -fvk-invert-y + +# Reciprocate SV_Position.w after reading from stage input in PS to accommodate the difference between Vulkan and DirectX +# -fvk-use-dx-position-w + +# layout +# -fvk-use-gl-layout Use strict OpenGL std140/std430 memory layout for Vulkan resources +# -fvk-use-scalar-layout Use scalar memory layout for Vulkan resources + +# -Zpc Pack matrices in column-major order +# -Zpr Pack matrices in row-major order + +# -WX Treat warnings as errors +# -Zi Enable debug information + +# TODO: transpile with spirv-cross to WGSL, GLSL, etc off the spirv. + +# -enable-16bit-types Enable 16bit types and disable min precision types. Available in HLSL 2018 and shader model 6.2 +# -Fc Output assembly code listing file + +# this prints cwd if not redirected diff --git a/hlslparser/hlslparser.sln b/hlslparser/hlslparser.sln new file mode 100644 index 00000000..d3abff53 --- /dev/null +++ b/hlslparser/hlslparser.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 2013 +VisualStudioVersion = 12.0.40629.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hlslparser", "hlslparser.vcxproj", "{FAA5AD82-3351-479F-A315-F287EBD0A816}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {FAA5AD82-3351-479F-A315-F287EBD0A816}.Debug|Win32.ActiveCfg = Debug|Win32 + {FAA5AD82-3351-479F-A315-F287EBD0A816}.Debug|Win32.Build.0 = Debug|Win32 + {FAA5AD82-3351-479F-A315-F287EBD0A816}.Release|Win32.ActiveCfg = Release|Win32 + {FAA5AD82-3351-479F-A315-F287EBD0A816}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/hlslparser/hlslparser.vcxproj b/hlslparser/hlslparser.vcxproj new file mode 100644 index 00000000..a4f9f227 --- /dev/null +++ b/hlslparser/hlslparser.vcxproj @@ -0,0 +1,90 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {FAA5AD82-3351-479F-A315-F287EBD0A816} + hlslparser + + + + Application + true + v120 + MultiByte + + + Application + false + v120 + true + MultiByte + + + + + + + + + + + + + + + Level3 + Disabled + true + + + true + + + + + Level3 + MaxSpeed + true + true + true + + + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/hlslparser/hlslparser.vcxproj.filters b/hlslparser/hlslparser.vcxproj.filters new file mode 100644 index 00000000..79268e9a --- /dev/null +++ b/hlslparser/hlslparser.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/hlslparser/hlslparser.xcodeproj/project.pbxproj b/hlslparser/hlslparser.xcodeproj/project.pbxproj new file mode 100644 index 00000000..9a2e5bcf --- /dev/null +++ b/hlslparser/hlslparser.xcodeproj/project.pbxproj @@ -0,0 +1,352 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 56; + objects = { + +/* Begin PBXBuildFile section */ + 70235C4B29B3145200909C95 /* HLSLParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3B29B3145200909C95 /* HLSLParser.cpp */; }; + 70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3C29B3145200909C95 /* CodeWriter.cpp */; }; + 70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C3F29B3145200909C95 /* HLSLGenerator.cpp */; }; + 70235C4E29B3145200909C95 /* HLSLTree.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4029B3145200909C95 /* HLSLTree.cpp */; }; + 70235C5029B3145200909C95 /* Engine.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4329B3145200909C95 /* Engine.cpp */; }; + 70235C5129B3145200909C95 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4529B3145200909C95 /* Main.cpp */; }; + 70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4629B3145200909C95 /* MSLGenerator.cpp */; }; + 70235C5329B3145200909C95 /* HLSLTokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 70235C4829B3145200909C95 /* HLSLTokenizer.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 702A2B5729A49DC8007D9A99 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 70235C3A29B3145200909C95 /* Engine.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Engine.h; sourceTree = ""; }; + 70235C3B29B3145200909C95 /* HLSLParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLParser.cpp; sourceTree = ""; }; + 70235C3C29B3145200909C95 /* CodeWriter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CodeWriter.cpp; sourceTree = ""; }; + 70235C3E29B3145200909C95 /* HLSLTokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTokenizer.h; sourceTree = ""; }; + 70235C3F29B3145200909C95 /* HLSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLGenerator.cpp; sourceTree = ""; }; + 70235C4029B3145200909C95 /* HLSLTree.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTree.cpp; sourceTree = ""; }; + 70235C4229B3145200909C95 /* HLSLParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLParser.h; sourceTree = ""; }; + 70235C4329B3145200909C95 /* Engine.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Engine.cpp; sourceTree = ""; }; + 70235C4429B3145200909C95 /* CodeWriter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CodeWriter.h; sourceTree = ""; }; + 70235C4529B3145200909C95 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Main.cpp; sourceTree = ""; }; + 70235C4629B3145200909C95 /* MSLGenerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MSLGenerator.cpp; sourceTree = ""; }; + 70235C4729B3145200909C95 /* MSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MSLGenerator.h; sourceTree = ""; }; + 70235C4829B3145200909C95 /* HLSLTokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = HLSLTokenizer.cpp; sourceTree = ""; }; + 70235C4929B3145200909C95 /* HLSLGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLGenerator.h; sourceTree = ""; }; + 70235C4A29B3145200909C95 /* HLSLTree.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = HLSLTree.h; sourceTree = ""; }; + 702A2B5929A49DC8007D9A99 /* hlslparser */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = hlslparser; sourceTree = BUILT_PRODUCTS_DIR; }; + 707D37DC29B97A3000B08D22 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 702A2B5629A49DC8007D9A99 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 70235C3929B3145200909C95 /* src */ = { + isa = PBXGroup; + children = ( + 70235C4429B3145200909C95 /* CodeWriter.h */, + 70235C3C29B3145200909C95 /* CodeWriter.cpp */, + 70235C3A29B3145200909C95 /* Engine.h */, + 70235C4329B3145200909C95 /* Engine.cpp */, + 70235C4A29B3145200909C95 /* HLSLTree.h */, + 70235C4029B3145200909C95 /* HLSLTree.cpp */, + 70235C4229B3145200909C95 /* HLSLParser.h */, + 70235C3B29B3145200909C95 /* HLSLParser.cpp */, + 70235C3E29B3145200909C95 /* HLSLTokenizer.h */, + 70235C4829B3145200909C95 /* HLSLTokenizer.cpp */, + 70235C4929B3145200909C95 /* HLSLGenerator.h */, + 70235C3F29B3145200909C95 /* HLSLGenerator.cpp */, + 70235C4729B3145200909C95 /* MSLGenerator.h */, + 70235C4629B3145200909C95 /* MSLGenerator.cpp */, + 70235C4529B3145200909C95 /* Main.cpp */, + ); + path = src; + sourceTree = ""; + }; + 702A2B5029A49DC8007D9A99 = { + isa = PBXGroup; + children = ( + 707D37DC29B97A3000B08D22 /* README.md */, + 70235C3929B3145200909C95 /* src */, + 702A2B5A29A49DC8007D9A99 /* Products */, + ); + sourceTree = ""; + }; + 702A2B5A29A49DC8007D9A99 /* Products */ = { + isa = PBXGroup; + children = ( + 702A2B5929A49DC8007D9A99 /* hlslparser */, + ); + name = Products; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 702A2B5829A49DC8007D9A99 /* hlslparser */ = { + isa = PBXNativeTarget; + buildConfigurationList = 702A2B6029A49DC8007D9A99 /* Build configuration list for PBXNativeTarget "hlslparser" */; + buildPhases = ( + 702A2B5529A49DC8007D9A99 /* Sources */, + 702A2B5629A49DC8007D9A99 /* Frameworks */, + 702A2B5729A49DC8007D9A99 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = hlslparser; + productName = hlslparser; + productReference = 702A2B5929A49DC8007D9A99 /* hlslparser */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 702A2B5129A49DC8007D9A99 /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastUpgradeCheck = 1410; + TargetAttributes = { + 702A2B5829A49DC8007D9A99 = { + CreatedOnToolsVersion = 14.1; + }; + }; + }; + buildConfigurationList = 702A2B5429A49DC8007D9A99 /* Build configuration list for PBXProject "hlslparser" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 702A2B5029A49DC8007D9A99; + productRefGroup = 702A2B5A29A49DC8007D9A99 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 702A2B5829A49DC8007D9A99 /* hlslparser */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 702A2B5529A49DC8007D9A99 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 70235C4E29B3145200909C95 /* HLSLTree.cpp in Sources */, + 70235C5129B3145200909C95 /* Main.cpp in Sources */, + 70235C5329B3145200909C95 /* HLSLTokenizer.cpp in Sources */, + 70235C5029B3145200909C95 /* Engine.cpp in Sources */, + 70235C4B29B3145200909C95 /* HLSLParser.cpp in Sources */, + 70235C5229B3145200909C95 /* MSLGenerator.cpp in Sources */, + 70235C4C29B3145200909C95 /* CodeWriter.cpp in Sources */, + 70235C4D29B3145200909C95 /* HLSLGenerator.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 702A2B5E29A49DC8007D9A99 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_CXX0X_EXTENSIONS = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO; + COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; + GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNKNOWN_PRAGMAS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 11.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + }; + name = Debug; + }; + 702A2B5F29A49DC8007D9A99 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_ATOMIC_IMPLICIT_SEQ_CST = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_CXX0X_EXTENSIONS = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CLANG_WARN__EXIT_TIME_DESTRUCTORS = NO; + COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS = YES; + GCC_WARN_NON_VIRTUAL_DESTRUCTOR = YES; + GCC_WARN_SHADOW = YES; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNKNOWN_PRAGMAS = YES; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 11.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = macosx; + }; + name = Release; + }; + 702A2B6129A49DC8007D9A99 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + MACOSX_DEPLOYMENT_TARGET = 13.0; + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.hlslparser; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 702A2B6229A49DC8007D9A99 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + CODE_SIGN_STYLE = Manual; + DEVELOPMENT_TEAM = LDJ95E4NS8; + MACOSX_DEPLOYMENT_TARGET = 13.0; + PRODUCT_BUNDLE_IDENTIFIER = com.hialec.hlslparser; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 702A2B5429A49DC8007D9A99 /* Build configuration list for PBXProject "hlslparser" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 702A2B5E29A49DC8007D9A99 /* Debug */, + 702A2B5F29A49DC8007D9A99 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 702A2B6029A49DC8007D9A99 /* Build configuration list for PBXNativeTarget "hlslparser" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 702A2B6129A49DC8007D9A99 /* Debug */, + 702A2B6229A49DC8007D9A99 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 702A2B5129A49DC8007D9A99 /* Project object */; +} diff --git a/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata b/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..3bb45bbf --- /dev/null +++ b/hlslparser/hlslparser.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,10 @@ + + + + + + + diff --git a/hlslparser/premake4.lua b/hlslparser/premake4.lua new file mode 100644 index 00000000..b51682c9 --- /dev/null +++ b/hlslparser/premake4.lua @@ -0,0 +1,18 @@ +solution "HLSLParser" + location "build" + configurations { "Debug", "Release" } + + project "HLSLParser" + kind "ConsoleApp" + language "C++" + files { "src/**.h", "src/**.cpp" } + + configuration "Debug" + targetdir "bin/debug" + defines { "DEBUG" } + flags { "Symbols" } + + configuration "Release" + targetdir "bin/release" + defines { "NDEBUG" } + flags { "Optimize" } \ No newline at end of file diff --git a/hlslparser/shaders/Compute.hlsl b/hlslparser/shaders/Compute.hlsl new file mode 100644 index 00000000..28744d31 --- /dev/null +++ b/hlslparser/shaders/Compute.hlsl @@ -0,0 +1,52 @@ +//-------------------------------------------------------------------------------------- +// File: Compute.hlsl +// +// This file contains the Compute Shader to perform array A + array B +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//-------------------------------------------------------------------------------------- + +#include "ShaderHLSL.h" + +// adapted from https://learn.microsoft.com/en-us/windows/win32/direct3d11/direct3d-11-advanced-stages-compute-create + +struct BufType +{ + int i; + float f; +}; + +StructuredBuffer Buffer0 : register(t0); +StructuredBuffer Buffer1 : register(t1); + +RWStructuredBuffer BufferOut : register(u2); + +// TODO: support numthreads designator +// [numthreads(1, 1, 1)] +void ComputeCS( uint3 tid : SV_DispatchThreadID ) +{ + BufferOut[tid.x].i = Buffer0[tid.x].i + Buffer1[tid.x].i; + BufferOut[tid.x].f = Buffer0[tid.x].f + Buffer1[tid.x].f; +} + +//------------------- + +// Need better way to search entry points, don't use filename +// Just search functions ending in VS/PS/CS +// [numthreads(1, 1, 1)] +// void ComputeCS( uint3 DTid : SV_DispatchThreadID ) +// { +// int i0 = asint( Buffer0.Load( DTid.x*8 ) ); +// float f0 = asfloat( Buffer0.Load( DTid.x*8+4 ) ); +// int i1 = asint( Buffer1.Load( DTid.x*8 ) ); +// float f1 = asfloat( Buffer1.Load( DTid.x*8+4 ) ); +// +// BufferOut.Store( DTid.x*8, asuint(i0 + i1) ); +// BufferOut.Store( DTid.x*8+4, asuint(f0 + f1) ); +// +// There is this new templated Load call. +// This greatly simplifies using BAB. +// float3 pos = Buffer0.Load(idx); +// Bufferout.Store(idx); +// } + diff --git a/hlslparser/shaders/Sample.hlsl b/hlslparser/shaders/Sample.hlsl new file mode 100644 index 00000000..ad22d004 --- /dev/null +++ b/hlslparser/shaders/Sample.hlsl @@ -0,0 +1,207 @@ +//********************************************************* +// +// Copyright (c) Microsoft. All rights reserved. +// This code is licensed under the MIT License (MIT). +// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY +// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR +// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. +// +//********************************************************* + +// from here https://github.com/microsoft/DirectX-Graphics-Samples/blob/master/Samples/Desktop/D3D12Multithreading/src/shaders.hlsl + +// TODO: also want to be able to share functions in a multientry point file +// so could move these out as static functions from the namespace in Metal. +// Right now if used, they get replicated into each class, but that handles +// any global use of texture+sampler/buffer. + +// Textures/samplers/buffers are globals to all shader within the file. +// But have to pair texture + sampler. In MSL, can combine +// into an argument buffer which holds all that. +// Vulkan has descriptor sets. + +#include "ShaderHLSL.h" + +Depth2D shadowMap : register(t0); +Texture2D diffuseMap : register(t1); +Texture2D normalMap : register(t2); + +SamplerState sampleWrap : register(s0); +SamplerComparisonState shadowMapSampler : register(s1); + +// #define didn't compile due to lack of preprocesor +static const int NUM_LIGHTS = 3; +//static const float SHADOW_DEPTH_BIAS = 0.00005; + +struct LightState +{ + float3 position; + float3 direction; + float4 color; + float4 falloff; + float4x4 viewProj; +}; + +struct SceneConstantBuffer +{ + float4x4 model; + float4x4 viewProj; + float4 ambientColor; + bool sampleShadowMap; + LightState lights[NUM_LIGHTS]; +}; + +// SM 6.1 +ConstantBuffer scene : register(b0); + +// no preprocessor to do this yet, so have to add functions +// can't seem to have overloads like this with same name +inline float4 mulr(float4 v, float4x4 m) { return mul(m,v); } +inline float3 mulr(float3 v, float3x3 m) { return mul(m,v); } +inline half3 mulr(half3 v, half3x3 m) { return mul(m,v); } + + +// TODO: also have this form, where can index into +// ConstantBuffer scene[10] : register(b0); + +// TODO: normal/tangent should be half3/4, but use 101010A2 in buffer +// but have to transform them by float4x4, so no point in declaring as half here +struct InputVS +{ + float3 position : SV_Position; + float2 uv : TEXCOORD0; + + float3 normal : NORMAL; + float4 tangent : TANGENT; +}; + +// DONE: normal/tangent should be half3/4 to cut parameter buffer +// but that will break Nvidia/Adreno. +struct OutputVS +{ + float4 position : SV_Position; + float4 worldPos : TEXCOORD0; + float2 uv : TEXCOORD1; + + half3io normal : NORMAL; + half4io tangent : TANGENT; +}; + +// DONE: color is now half +struct OutputPS +{ + half4io target0 : SV_Target0; +}; + +// Sample normal map, convert to signed, apply tangent-to-world space transform. +half3 CalcPerPixelNormal(float2 texcoord, half3 vertNormal, half3 vertTangent, half bitanSign) +{ + half3 vertBinormal = normalize(cross(vertTangent, vertNormal)) * bitanSign; + half3x3 tangentSpaceToWorldSpace = half3x3(vertTangent, vertBinormal, vertNormal); + + // Compute per-pixel normal. + half4 bumpSample = normalMap.Sample(sampleWrap, texcoord); + half3 bumpNormal = bumpSample.xyz; // normalMap.Sample(sampleWrap, texcoord).xyz; + + // TODO: let snorm format handle, and do z reconstruct + bumpNormal = 2.0h * bumpNormal - 1.0h; + + return mulr(bumpNormal, tangentSpaceToWorldSpace); +} + +// Diffuse lighting calculation, with angle and distance falloff. +half4 CalcLightingColor(float3 lightPos, half3 lightDir, half4 lightColor, float4 falloffs, float3 posWorld, half3 perPixelNormal) +{ + float3 lightToPixelUnNormalized = posWorld - lightPos; + + // Dist falloff = 0 at falloffs.x, 1 at falloffs.x - falloffs.y + float dist = length(lightToPixelUnNormalized); + + half distFalloff = (half)saturate((falloffs.x - dist) / falloffs.y); + + // Normalize from here on. + half3 lightToPixelNormalized = (half3)normalize(lightToPixelUnNormalized); + + // Angle falloff = 0 at falloffs.z, 1 at falloffs.z - falloffs.w + //half3 lightDirHalf = (half3)normalize(lightDir); + half cosAngle = dot(lightToPixelNormalized, lightDir); + half angleFalloff = saturate((cosAngle - (half)falloffs.z) / (half)falloffs.w); + + // Diffuse contribution. + half dotNL = saturate(-dot(lightToPixelNormalized, perPixelNormal)); + + return lightColor * (dotNL * distFalloff * angleFalloff); +} + +// Test how much pixel is in shadow, using 2x2 percentage-closer filtering. +half CalcUnshadowedAmountPCF2x2(float4 posWorld, float4x4 viewProj) +{ + // Compute pixel position in light space. + float4 lightSpacePos = posWorld; + lightSpacePos = mulr(lightSpacePos, viewProj); + + // need to reject before division (assuming revZ, infZ) + if (lightSpacePos.z > lightSpacePos.w) + return 1.0h; + + // near/w for persp, z/1 for ortho + lightSpacePos.xyz /= lightSpacePos.w; + + // Use HW filtering + return (half)shadowMap.SampleCmp(shadowMapSampler, lightSpacePos.xy, lightSpacePos.z); +} + +OutputVS SampleVS(InputVS input) +{ + OutputVS output; + + float4 newPosition = float4(input.position, 1.0); + + newPosition = mulr(newPosition, scene.model); + + output.worldPos = newPosition; + + newPosition = mulr(newPosition, scene.viewProj); + + output.position = newPosition; + output.uv = input.uv; + + // This only works if only uniform scale and invT on normal + output.normal = (half3io)mulr(input.normal, (float3x3)scene.model); + output.tangent.xyz = (half3io)mulr(input.tangent.xyz, (float3x3)scene.model); + output.tangent.w = (halfio)input.tangent.w; + + return output; +} + +OutputPS SamplePS(OutputVS input) +{ + // Compute tangent frame. + half3 normal = normalize((half3)input.normal); + half3 tangent = normalize((half3)input.tangent.xyz); + half bitanSign = (half)input.tangent.w; + + half4 diffuseColor = diffuseMap.Sample(sampleWrap, input.uv); + half3 pixelNormal = CalcPerPixelNormal(input.uv, normal, tangent, bitanSign); + half4 totalLight = (half4)scene.ambientColor; + + for (int i = 0; i < NUM_LIGHTS; i++) + { + LightState light = scene.lights[i]; + half4 lightPass = CalcLightingColor(light.position, normalize((half3)light.direction), (half4)light.color, light.falloff, input.worldPos.xyz, pixelNormal); + + // only single light shadow map + if (i == 0 && scene.sampleShadowMap) + { + lightPass *= CalcUnshadowedAmountPCF2x2(input.worldPos, light.viewProj); + } + totalLight += lightPass; + } + + OutputPS output; + output.target0 = (half4io)(diffuseColor * saturate(totalLight)); + return output; +} + + diff --git a/hlslparser/shaders/ShaderHLSL.h b/hlslparser/shaders/ShaderHLSL.h new file mode 100644 index 00000000..56b74b8b --- /dev/null +++ b/hlslparser/shaders/ShaderHLSL.h @@ -0,0 +1,460 @@ +#ifndef ShaderHLSL_h +#define ShaderHLSL_h + +// glslc doesn't support but DXC does +// so had to add header guard +#ifndef __spirv__ +#pragma once +#endif + +// Don't know why HLSL doesn't support these +#define min3(x,y,z) min(x, min(y, z)) +#define max3(x,y,z) max(x, max(y, z)) +#define length_squared(x) ((x)*(x)) +#define distance_squared(x,y) (((x)-(y))*((x)-(y))) + +// Use templated type to pass tex + sampler combos +//template +//struct TexSampler +//{ +// T t; +// SamplerState s; +//}; + +// no &* or ctors in HLSL limited C++ +// This means operators cannot overload [+-*/>><<]=. Only builtins work. + +// HLSL2021 adds bitfields, so could define a color. +// They say they are on bw compatible with DX12 releases, but spirv backed should warn. +//struct ColorRGBA { +// uint R : 8; +// uint G : 8; +// uint B : 8; +// uint A : 8; +//}; + +// DX12 support SM6, DX11 support SM5 and prior. +// But only latest VS2022 supports SM6.6 +// DXC should be translating calls back to Vulkan + +// in HLSL 2021 logical operators can only be used with scalar values. +// new and/or constructs instead of using &&/|| +// bool3 Cond = and(X, Y); +// bool3 Cond2 = or(X, Y); +// int3 Z = select(X, 1, 0); + +// RW/ByteAddressBuffer added SM 5.1 +// RW/StructuredBuffer added SM 5.1 +// ConstantBuffer foo[4] : register(b0) template supportes indexing too. +// added in SM 6.1. So targeting DX12 6.2 seems ideal with half support. + +// For float16 operations, denormal numbers must be preserved. +// No atomic operations for float16 are supported. + +// no using, so do typedef +// this is ugly syntax +//typedef int16_t short; +typedef int16_t2 short2; +typedef int16_t3 short3; +typedef int16_t4 short4; + +typedef uint16_t ushort; +typedef uint16_t2 ushort2; +typedef uint16_t3 ushort3; +typedef uint16_t4 ushort4; + +//typedef int64_t long; +typedef int64_t2 long2; +typedef int64_t3 long3; +typedef int64_t4 long4; + +typedef uint64_t ulong; +typedef uint64_t2 ulong2; +typedef uint64_t3 ulong3; +typedef uint64_t4 ulong4; + +// TODO: should matrices be added for long/short? + +//typedef float64_t double; +typedef float64_t2 double2; +typedef float64_t3 double3; +typedef float64_t4 double4; + +typedef float64_t2x2 double2x2; +typedef float64_t3x3 double3x3; +typedef float64_t4x4 double4x4; + + +// Note: no u/char +// Note: add double, but won't work on mobile (Android/MSL). +// also Intel removed fp64 GPU support. Often runs 1/64th speed. +// But may be needed for ray-tracing large worlds. Metal doesn't have double. + +// TODO: add Atomics, more atomic u/long and float in SM 6.6 +// otherwise it's most atomic_u/int that is portable. +// Apple Metal 3 added atomic_float. + +// 6.6 is cutting edge, want to target 6.2 for now +#define SM66 0 +#if SM66 +// compile to SM6.6 for these +typedef uint8_t4_packed uchar4_packed; +typedef int8_t4_packed char4_packed; + +// signed do sign extend +ushort4 toUshort4(uchar4_packed packed) +{ + return unpack_u8u16(packed); +} +short4 toShort4(char4_packed packed) +{ + return unpack_s8s16(packed); +} +uint4 toUint4(uchar4_packed packed) +{ + return unpack_u8u32(packed); +} +int4 toInt4(char4_packed packed) +{ + return unpack_s8s32(packed); +} + +// Are SM6.6 calls for pack_clamp_u8 using the wrong input type? +// https://github.com/microsoft/DirectXShaderCompiler/issues/5091 +// pack lower 8 +uchar4_packed fromUshort4(ushort4 v) +{ + return pack_u8(v); +} +uchar4_packed fromShort4ClampU(short4 v) +{ + return pack_clamp_u8(v); +} +char4_packed fromShort4(short4 v, bool clamp = true) +{ + return clamp ? pack_clamp_s8(v) : pack_s8(v); +} +uchar4_packed fromUint4(uint4 v) +{ + return pack_u8(v); +} +uchar4_packed fromInt4ClampU(int4 v) +{ + return pack_clamp_u8(v); +} +char4_packed fromInt4(int4 v, bool clamp = true) +{ + return clamp ? pack_clamp_s8(v) : pack_s8(v); +} +#endif + + +// TODO: toggle building shaders for Andreno/Nvidia +#define USE_HALF 1 + +// Only Android Adreno doesn't have fp16 storage, but MSL/desktop has this. +// Can't use min16Float since that is remapped to fp16. + +#define USE_HALF_STORAGE 1 + +#if USE_HALFIO + +typedef half halfst; +typedef half2 half2st; +typedef half3 half3st; +typedef half4 half4st; + +#else + +typedef float halfst; +typedef float2 half2st; +typedef float3 half3st; +typedef float4 half4st; + +#endif + +// Nvidia/Adreno don't support half as inputOutput, but that +// just wastes parameter buffer on tiled architectures. Can +// just used these as casts and types inside input/output. + +#define USE_HALFIO USE_HALF + +// This is handled by parser, but syntax highlighting would need this +#if USE_HALFIO + +typedef half halfio; +typedef half2 half2io; +typedef half3 half3io; +typedef half4 half4io; + +#else + +typedef float halfio; +typedef float2 half2io; +typedef float3 half3io; +typedef float4 half4io; + +#endif + +// This has templated elements appended, so typedef doesn't work. +// HLSL doesn't distingush depth/color, but MSL does. These calls combine +// the comparison value in the z or w element. +#define Depth2D Texture2D +#define Depth2DArray Texture2DArray +#define DepthCube TextureCube + + +/* These are now defined as member function intrinsics + +// TODO: fix parsing, so don't have to provide these overrides +// The parser also has to rewrite params on MSL and wrap args. + +//---------- + +float4 Sample(Texture2D t, SamplerState s, float2 texCoord, int2 offset = 0) +{ + return t.Sample(s, texCoord, offset); +} + +float4 Sample(Texture2DArray t, SamplerState s, float3 texCoord, int2 offset = 0) +{ + return t.Sample(s, texCoord, offset); +} + +float4 Sample(Texture3D t, SamplerState s, float3 texCoord, int3 offset = 0) +{ + return t.Sample(s, texCoord, offset); +} + +// no offset +float4 Sample(TextureCube t, SamplerState s, float3 texCoord) +{ + return t.Sample(s, texCoord); +} + +float4 Sample(TextureCubeArray t, SamplerState s, float4 texCoord) +{ + return t.Sample(s, texCoord); +} + + +//---------- + +// Can use these inside vertex shader +float4 SampleLevel(Texture2D t, SamplerState s, float4 texCoord, int2 offset = 0) +{ + return t.SampleLevel(s, texCoord.xy, texCoord.w, offset); +} +float4 SampleLevel(Texture2DArray t, SamplerState s, float4 texCoord, int2 offset = 0) +{ + return t.SampleLevel(s, texCoord.xyz, texCoord.w, offset); +} + +float4 SampleLevel(Texture3D t, SamplerState s, float4 texCoord, int3 offset = 0) +{ + return t.SampleLevel(s, texCoord.xyz, texCoord.w, offset); +} + +// no offset support +float4 SampleLevel(TextureCube t, SamplerState s, float4 texCoord) +{ + return t.SampleLevel(s, texCoord.xyz, texCoord.w); +} + +// this would need more args for level +//float4 SampleLevel(TextureCubeArray t, SamplerState s, float4 texCoord) +//{ +// return t.SampleLevel(s, texCoord.xyz, texCoord.w); +//} + + +//---------- + +float4 SampleBias(Texture2D t, SamplerState s, float4 texCoord) +{ + return t.SampleBias(s, texCoord.xy, texCoord.w); +} + +float4 SampleBias(TextureCube t, SamplerState s, float4 texCoord) +{ + return t.SampleBias(s, texCoord.xyz, texCoord.w); +} + +//---------- + +float4 SampleGrad(Texture2D t, SamplerState s, float2 texCoord, float2 gradx, float2 grady) +{ + return t.SampleGrad(s, texCoord.xy, gradx, grady); +} + +//---------- + + + +// can just use the default for Texture2D +//float4 Sample(Texture2D t, SamplerState s, float2 texCoord, int2 offset = 0) +//{ +// return t.Sample(s, texCoord.xy, offset); +//} + +// For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube +float SampleCmp(Texture2D t, SamplerComparisonState s, float4 texCoord, int2 offset = 0) +{ + return t.SampleCmp(s, texCoord.xy, texCoord.z, offset); +} + +float4 GatherCmp(Texture2D t, SamplerComparisonState s, float4 texCoord, int2 offset = 0) +{ + return t.GatherCmp(s, texCoord.xy, texCoord.z, offset); +} + +//---------- + +// Use these in VS. Why doesn't bilinear work in VS? +// TextureLevel should work in VS, since lod is specific. + + +// can also use this stype +// uint2 pos_xy = uint2( 0, 10 ); +// texelColor = tex0[ pos_xy ] ; + + +// TODO: these also take offsets +float4 Load(Texture2D t, int2 texCoord, int lod = 0, int2 offset = 0) +{ + return t.Load(int3(texCoord, lod), offset); +} + +float4 Load(Texture3D t, int3 texCoord, int lod = 0, int3 offset = 0) +{ + return t.Load(int4(texCoord, lod), offset); +} + +float4 Load(Texture2DArray t, int3 texCoord, int lod = 0, int2 offset = 0) +{ + return t.Load(int4(texCoord, lod), offset); +} + +// no support in HLSL +//float4 Load(TextureCube t, int3 texCoord) +//{ +// return t.Load(texCoord); +//} +// +//float4 Load(TextureCubeArray t, int4 texCoord) +//{ +// return t.Load(texCoord); +//} + +// this doesn't use SamplerState, raw load, not sampleIndex not lod +float4 Load(Texture2DMS t, int2 texCoord, int sample, int2 offset = 0) +{ + return t.Load(texCoord, sample, offset); +} + + +//---------- + +// gather only works on mip0 +float4 GatherRed(Texture2D t, SamplerState s, float2 texCoord, int2 offset=0) +{ + return t.GatherRed(s, texCoord, offset); +} + +float4 GatherGreen(Texture2D t, SamplerState s, float2 texCoord, int2 offset=0) +{ + return t.GatherGreen(s, texCoord, offset); +} + +float4 GatherBlue(Texture2D t, SamplerState s, float2 texCoord, int2 offset=0) +{ + return t.GatherBlue(s, texCoord, offset); +} + +float4 GatherAlpha(Texture2D t, SamplerState s, float2 texCoord, int2 offset=0) +{ + return t.GatherAlpha(s, texCoord, offset); +} + + + +#if USE_HALF + +// Note: HLSL to SPIRV doesn't support half Texture types, so must cast from float4 +// but MSL and HLSL to DXIL can use half Texture type. +half4 SampleH(Texture2D t, SamplerState s, float2 texCoord, int2 offset = 0) +{ + return (half4)t.Sample(s, texCoord, offset); +} + +half4 SampleLevelH(Texture2D t, SamplerState s, float4 texCoordMip, int2 offset = 0) +{ + return (half4)t.SampleLevel(s, texCoordMip.xy, texCoordMip.w, offset); +} + +// offset? +half4 SampleBiasH(Texture2D t, SamplerState s, float4 texCoordBias) +{ + return (half4)t.SampleBias(s, texCoordBias.xy, texCoordBias.w); +} + +#else + +// use all float4 +#define SampleH Sample +#define SampleLevelH SampleLevel +#define SampleBiasH SampleBias + +#endif + + */ + +// There are 2 variants of GetDimensions, one that takes a mipLevel input +// and returns params for that, and one that returns mip0. + +// TODO: these should be types, but by leaving off type, they apply to all types. +int2 GetDimensions(Texture2D t) +{ + int2 size; + t.GetDimensions(size.x, size.y); + return size; +} + +int3 GetDimensions(Texture3D t) +{ + int3 size; + t.GetDimensions(size.x, size.y, size.z); + return size; +} + +int2 GetDimensions(TextureCube t) +{ + int2 size; + t.GetDimensions(size.x, size.y); // sizexsize + return size; +} + +int3 GetDimensions(TextureCubeArray t) +{ + int3 size; + t.GetDimensions(size.x, size.y, size.z); // sizexsize + return size; +} + +int3 GetDimensions(Texture2DArray t) +{ + int3 size; + t.GetDimensions(size.x, size.y, size.z); + return size; +} + +int2 GetDimensions(Texture2DMS t) +{ + int3 size; + t.GetDimensions(size.x, size.y, size.z); + return size.xy; +} + + +#endif // ShaderHLSL_h + diff --git a/hlslparser/shaders/ShaderMSL.h b/hlslparser/shaders/ShaderMSL.h new file mode 100644 index 00000000..290a9532 --- /dev/null +++ b/hlslparser/shaders/ShaderMSL.h @@ -0,0 +1,467 @@ +#ifndef ShaderMSL_h +#define ShaderMSL_h + +// For some reason the air link thinks all symbols in this header are duplicates +// unles I mark them all as inline. So do that for now. + +// glslc doesn't support, but this header is metal only +#pragma once + +// TODO: support function_constants in MSL, is there HLSL equivalent yet +// [[function_constant(index)]] +// +// Here's someone trying to solve for HLSL by modding DXIL from DXC +// https://twitter.com/RandomPedroJ/status/1532725156623286272 +// Vulkan doesn't run on older Intel, but DX12 does. + +// Header can be pulled into regular code to build +// Taken from KramShaders.h + +#ifndef __METAL_VERSION__ +#import +#else +#include +#include +#endif + +#import + +#ifdef __METAL_VERSION__ +#define NS_ENUM(_type, _name) \ + enum _name : _type _name; \ + enum _name : _type +#endif + +// This isn't standard enum convention where enum starts with enum name +// but this allows semantic passthrough of parser. +typedef NS_ENUM(int32_t, VA) { + POSITION = 0, + + NORMAL = 2, + TANGENT = 3, + BITANGENT = 4, + + BLENDINDICES = 5, + BLENDWEIGHT = 6, + + COLOR0 = 7, + + TEXCOORD0 = 8, + TEXCOORD1 = 9, + TEXCOORD2 = 10, + TEXCOORD3 = 11, + TEXCOORD4 = 12, + TEXCOORD5 = 13, + TEXCOORD6 = 14, + TEXCOORD7 = 15, +}; + +// May want to only do using in the .metal files themselvs. +using namespace metal; +using namespace simd; + +// can safely use half on Metal +#define USE_HALF 1 + +#define USE_HALFIO USE_HALF + +// these aren't really needed, HLSLParser has options that replace this +// in the output code. +typedef half halfio; +typedef half2 half2io; +typedef half3 half3io; +typedef half4 half4io; + +typedef half halfst; +typedef half2 half2st; +typedef half3 half3st; +typedef half4 half4st; + +// #define mad precise::fma" + + +inline float mad(float a, float b, float c) { + return a * b + c; +} +inline float2 mad(float2 a, float2 b, float2 c) { + return a * b + c; +} +inline float3 mad(float3 a, float3 b, float3 c) { + return a * b + c; +} +inline float4 mad(float4 a, float4 b, float4 c) { + return a * b + c; +} + + +// DirectX couldn't simply use operator * in all these years +// so have to use a function call mul. + +// Might be easier to use * instead +inline float2x2 mul(float a, float2x2 m) { return a * m; } +inline float3x3 mul(float a, float3x3 m) { return a * m; } +inline float4x4 mul(float a, float4x4 m) { return a * m; } + +inline float2x2 mul(float2x2 m, float a) { return a * m; } +inline float3x3 mul(float3x3 m, float a) { return a * m; } +inline float4x4 mul(float4x4 m, float a) { return a * m; } + +inline float2 mul(float2 a, float2x2 m) { return a * m; } +inline float3 mul(float3 a, float3x3 m) { return a * m; } +inline float4 mul(float4 a, float4x4 m) { return a * m; } + +inline float2 mul(float2x2 m, float2 a) { return m * a; } +inline float3 mul(float3x3 m, float3 a) { return m * a; } +inline float4 mul(float4x4 m, float4 a) { return m * a; } + +//float3 mul(float4 a, float3x4 m) { return a * m; } // why no macro ? +//float2 mul(float4 a, float2x4 m) { return a * m; } + +#if USE_HALF + +inline half mad(half a, half b, half c) { + return a * b + c; +} +inline half2 mad(half2 a, half2 b, half2 c) { + return a * b + c; +} +inline half3 mad(half3 a, half3 b, half3 c) { + return a * b + c; +} +inline half4 mad(half4 a, half4 b, half4 c) { + return a * b + c; +} + + +inline half2x2 mul(half a, half2x2 m) { return a * m; } +inline half3x3 mul(half a, half3x3 m) { return a * m; } +inline half4x4 mul(half a, half4x4 m) { return a * m; } + +inline half2x2 mul(half2x2 m, half a) { return a * m; } +inline half3x3 mul(half3x3 m, half a) { return a * m; } +inline half4x4 mul(half4x4 m, half a) { return a * m; } + +inline half2 mul(half2 a, half2x2 m) { return a * m; } +inline half3 mul(half3 a, half3x3 m) { return a * m; } +inline half4 mul(half4 a, half4x4 m) { return a * m; } + +inline half2 mul(half2x2 m, half2 a) { return m * a; } +inline half3 mul(half3x3 m, half3 a) { return m * a; } +inline half4 mul(half4x4 m, half4 a) { return m * a; } + +inline float3x3 tofloat3x3(float4x4 m) { + return float3x3(m[0].xyz, m[1].xyz, m[2].xyz); +} +inline half3x3 tohalf3x3(half4x4 m) { + return half3x3(m[0].xyz, m[1].xyz, m[2].xyz); +} + + +#endif + +// TODO: parser could replace these intrinsic names in metal +#define lerp mix +#define rcp recip +#define ddx dfdx +#define ddy dfdy +#define frac fract +#define isinfinite isinf +#define degrees(x) ((x) / (M_PI/180.0)) +#define radians(x) ((x) * (M_PI/180.0)) +#define reversebits(x) reverse_bits(x)) + +// bit ops +#define countbits(x) popcount(x) +#define firstbithigh(x) clz(x) +#define firstbitlow(x) ctz(x) + +#define clip(x) if (all((x) < 0.0) discard_fragment() + +// Use templated type to pass tex + sampler combos +// Then parser would have to handle templates. Ick. +//template +//struct TexSampler +//{ +// T t; +// sampler s; +//}; + +//--------- + +/* +// gather only works on mip0 +inline float4 GatherRed(texture2d t, sampler s, float2 texCoord, int2 offset=0) { + return t.gather(s, texCoord, offset, component::x); +} + +inline float4 GatherGreen(texture2d t, sampler s, float2 texCoord, int2 offset=0) { + return t.gather(s, texCoord, offset, component::y); +} + +inline float4 GatherBlue(texture2d t, sampler s, float2 texCoord, int2 offset=0) { + return t.gather(s, texCoord, offset, component::z); +} + +inline float4 GatherAlpha(texture2d t, sampler s, float2 texCoord, int2 offset=0) { + return t.gather(s, texCoord, offset, component::w); +} + +//--------- + +inline float4 SampleGrad(texture2d t, sampler s, float2 texCoord, float2 gradx, float2 grady) { + return t.sample(s, texCoord.xy, gradient2d(gradx, grady)); +} + +//--------- + +#if USE_HALF + +inline half4 SampleH(texture2d t, sampler s, float2 texCoord) { + return t.sample(s, texCoord); +} + +inline half4 SampleLevelH(texture2d t, sampler s, float4 texCoordMip) { + return t.sample(s, texCoordMip.xy, level(texCoordMip.w)); +} + +inline half4 SampleBiasH(texture2d t, sampler s, float4 texCoordBias) { + return t.sample(s, texCoordBias.xy, bias(texCoordBias.w)); +} + +#else + +#define SampleH Sample +#define SampleLevelH SampleLevel +#define SampleBiasH SampleBias + +#endif + + + +inline float4 SampleLevel(texture2d t, sampler s, float4 texCoordMip) { + return t.sample(s, texCoordMip.xy, level(texCoordMip.w)); +} + +inline float4 SampleLevel(texturecube t, sampler s, float4 texCoordMip) { + return t.sample(s, texCoordMip.xyz, level(texCoordMip.w)); +} + +inline float4 SampleLevel(texture3d t, sampler s, float4 texCoordMip) { + return t.sample(s, texCoordMip.xyz, level(texCoordMip.w)); +} + +// TODO: may need to add to intrinsics +//inline float4 SampleLevel(texture2d_array t, sampler s, float4 texCoordMip) { +// return t.sample(s, texCoordMip.xyz, level(texCoordMip.w)); +//} +//inline float4 SampleLevel(texturecube_array t, sampler s, float4 texCoordMip) { +// return t.sample(s, texCoordMip.xyz, level(texCoordMip.w)); +//} + +// ---- + +inline float4 SampleBias(texturecube t, sampler s, float4 texCoordBias) { + return t.sample(s, texCoordBias.xyz, bias(texCoordBias.w)); +} + +inline float4 SampleBias(texture2d t, sampler s, float4 texCoordBias) { + return t.sample(s, texCoordBias.xy, bias(texCoordBias.w)); +} + +//------ + +// see if some of these have offset +inline float4 Load(texture2d t, int2 texCoord, int lod = 0) +{ + return t.read((uint2)texCoord, (uint)lod); +} + +inline float4 Load(texture3d t, int3 texCoord, int lod = 0) +{ + return t.read((uint3)texCoord, (uint)lod); +} + +inline float4 Load(texture2d_array t, int3 texCoord, int lod = 0) +{ + return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)lod); +} + +// no HLSL equivalent, so don't define for MSL. Maybe it's just offset that is missing. +//inline float4 Load(texturecube t, int3 texCoord, int lod = 0) +//{ +// uv, face, lod, offset +// return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint2)lod); +//} +// +//inline float4 Load(texturecube_array t, int4 texCoord, int lod = 0) +//{ +// return t.read((uint2)texCoord.xy, (uint)texCoord.z, (uint)texcoord.w, (uint)lod); +//} + +// this doesn't use SamplerState, raw load +inline float4 Load(texture2d_ms t, int2 texCoord, int sample) { + return t.read((uint2)texCoord, (uint)sample); +} + +// also write call (Store in HLSL) + +// ---- + + +inline float4 Sample(texture2d_array t, sampler s, float3 texCoord, int2 offset=0) { + return t.sample(s, texCoord.xy, uint(texCoord.z), offset); +} +inline float4 Sample(texture2d t, sampler s, float2 texCoord, int2 offset=0) { + return t.sample(s, texCoord, offset); +} +inline float4 Sample(texture3d t, sampler s, float3 texCoord, int3 offset=0) { + return t.sample(s, texCoord, offset); +} +inline float4 Sample(texturecube t, sampler s, float3 texCoord) { + return t.sample(s, texCoord); +} +inline float4 Sample(texturecube_array t, sampler s, float4 texCoord) { + return t.sample(s, texCoord.xyz, uint(texCoord.w)); +} +//---------- + +inline float4 Sample(depth2d t, sampler s, float2 texCoord, int2 offset = 0) +{ + return t.sample(s, texCoord.xy, offset); +} + + +// For persp shadows, remember to divide z = z/w before calling, or w = z/w on cube +inline float SampleCmp(depth2d t, sampler s, float4 texCompareCoord, int2 offset = 0) +{ + return t.sample_compare(s, texCompareCoord.xy, texCompareCoord.z, offset); +} + +inline float4 GatherCmp(depth2d t, sampler s, float4 texCompareCoord, int2 offset = 0) +{ + return t.gather_compare(s, texCompareCoord.xy, texCompareCoord.z, offset); +} + */ + +// Trying to override member functions is just not simple like +// raw functions are. And macros are insufficient when the args +// differ across the same named call. Also macros can't handle +// defaults. + + +// TODO: some have optional offsets, but not cube/cubearray in HLSL +// TODO: array lookup on Sample is uint?, +#define Sample(s, uv) sample(s, uv) +#define SampleLevel(s, uv, level_) sample(s, uv, level(level_)) +#define SampleBias(s, uv, bias_) sample(s, uv, bias(bias_)) + +// only valid for Texture2D, there is gradient3d +#define SampleGrad(s, uv, gradx, grady) sample(s, uv, gradient2d(gradx, grady)) + +#define SampleCmp(s, uv, val) sample_compare(s, uv, val) +#define GatherCmp(s, uv, val) gather_compare(s, uv, val) + +// TODO: must pass uint, not int to read unlike HLSL +// more complex with face, array, lod +#define Load(uv, sample) read(uv, sample) + +#define GatherRed(s, uv) gather(x, uv, (int2)0, component::x) +#define GatherGreen(s, uv) gather(x, uv, (int2)0, component::y) +#define GatherBlue(s, uv) gather(x, uv, (int2)0, component::z) +#define GatherAlpha(s, uv) gather(x, uv, (int2)0, component::w) + +// ---- + +// get_num_mip_levels, get_array_size +// get_width/height/depth(lod) +// TODO: need half versions +inline int2 GetDimensions(texture2d t) +{ + int2 size(t.get_width(), t.get_height()); + return size; +} + +inline int3 GetDimensions(texture3d t) +{ + int3 size(t.get_width(), t.get_height(), t.get_depth()); + return size; +} + +inline int2 GetDimensions(texturecube t) +{ + int2 size(t.get_width(), t.get_width()); + return size; +} + +inline int3 GetDimensions(texturecube_array t) +{ + int3 size(t.get_width(), t.get_width(), t.get_array_size()); + return size; +} + +inline int3 GetDimensions(texture2d_array t) +{ + int3 size(t.get_width(), t.get_height(), t.get_array_size()); + return size; +} + +inline int2 GetDimensions(texture2d_ms t) +{ + int2 size(t.get_width(), t.get_height()); + return size; +} + +// For textures, T can be half, float, short, ushort, int, or uint. +// For depth texture types, T must be float. +// +// texture2d_ms, texture2d_msaa_array +// +// depth2d, _ms, _ms_array, _array, +// depthcube, depthcube_array + +/// TODO: add sparse_sample options +//template +//struct sparse_color { +//public: +// constexpr sparse_color(T value, bool resident) thread; +// // Indicates whether all memory addressed to retrieve the value was +//mapped. +// constexpr bool resident() const thread; +// +// // Retrieve the color value. +// constexpr T const value() const thread; +//}; +// sparse_sample(s, coord, offset), sparse_gather, sparse_sample_compare, sparse_gather_compare +// min_lod_clamp(float) option to sample + +// gradientcube, gradient3d, min_lod_clamp(float lod), +// bias(float value), level(float lod) +// uint get_num_samples() const +// +// can have textures in structs, would help pass tex + sampler +// but already had that before in hlslparser. Could +// bring that back, but have those built by caller. +// could code rewrite calls to pass tex/sampler into them +// and then don't need the struct wrapper in MSL. That +// severly limits sharing structs, functions. The structs +// don't really need to be in there. +// +// struct Foo { +// texture2d a [[texture(0)]]; +// depth2d b [[texture(1)]]; +// }; +// + + +// handle access specifier RWTexture mods the template arg +// texture2d a; +// on iOS, Writable textures aren’t supported within an argument buffer. +// 31/96/50000 buffers+textures for A7, A11, A13/Tier2 +// 16/16/2048 samplers +// 64/128/16 on Tier1 macOS, see above for Tier2 (discrete gpu) + +// MSL 2.3 has function pointers +// MSL 2.4 has compute recursion + +#endif + diff --git a/hlslparser/shaders/Skinning.hlsl b/hlslparser/shaders/Skinning.hlsl new file mode 100644 index 00000000..003d76f4 --- /dev/null +++ b/hlslparser/shaders/Skinning.hlsl @@ -0,0 +1,233 @@ + +#include "ShaderHLSL.h" + +// TODO: syntax highlighting as Metal doesn't work +// This isn't including header, but that doesn't seem to fix either. +// Need HLSL plugin for Xcode. files have to be a part of project +// to get syntax highlight but even that doesn't work. And ref folders +// also don't work since the files aren't part of the project. +// +// https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/SPIR-V.rst + +// setup variants +// HLSL: specialization constants marked at beginning +// [[vk::constant_id(0)]] const int specConstInt = 1; +// [[vk::constant_id(1)]] const bool specConstBool = true; +// +// MSL: function constants marked at end +// constant bool a [[function_constant(0)]]; +// constant int a [[function_constant(1)]]; // 0.. 64K-1 + +// This is for tile shaders +// subpass input, and SubpassLoad() calls +// [[vk::input_attachment_index(i)]] SubpassInput input; +// class SubpassInput { T SubpassLoad(); }; +// class SubpassInputMS { T SubpassLoad(int sampleIndex); }; + +// push constants (DONE) +// [[vk::push_constant]] + +// descriptors and arg buffers +// [[vk::binding(X[, Y])]] and [[vk::counter_binding(X)]] + +// tagging the format of buffers/textures, since HLSL can't represent +// [[vk::image_format("rgba8")]] +// RWBuffer Buf; +// +// [[vk::image_format("rg16f")]] +// RWTexture2D Tex; + +// structure buffer only supports 2/4B access, ByteAddressBuffer only 4B increments +// #ifdef __spirv__ +// [[vk::binding(X, Y), vk::counter_binding(Z)]] +// #endif +// StructuredBuffer ssbo; + +// No u/int8_t or u/char in HLSL. +// There is int64_t/uint46_t in MSL. +// D3DCOLORtoUBYTE4: Decodes a D3DCOLOR packed DWORD to a float4. +// Note the swizzle, and I don't want an int4. I need to encode. +// This is achieved by performing int4(input.zyxw * 255.002) using SPIR-V OpVectorShuffle, OpVectorTimesScalar, and OpConvertFToS, respectively. + +// Have uint16_t/int16_t support in 6.2. +// +// cbuffer are std140, and ssbo are std430 arrangment. Affects arrays. +// or -fvk-use-dx-layout vs. -fvk-use-gl-layout vs. -fvk-use-scalar-layout. +// Scalar layout rules introduced via VK_EXT_scalar_block_layout, which basically aligns +// all aggregrate types according to their elements' natural alignment. +// They can be enabled by -fvk-use-scalar-layout. +// see table. Vulkan can't use DX layout yet. +// +// This is 6.1 change so constants can be array indexed +// And it also reduces the quantity of globals throughout and ties to MSL better. +// cbuffer vs. ConstantBuffer myCBuffer[10]; + +// struct VSInput { +// [[vk::location(0)]] float4 pos : POSITION; +// [[vk::location(1)]] float3 norm : NORMAL; +// }; + +// 6.2 adds templated load, so can +//ByteAddressBuffer buffer; +// +//float f1 = buffer.Load(idx); +//half2 h2 = buffer.Load(idx); +//uint16_t4 i4 = buffer.Load(idx); + +// MSL rule; +// If a vertex function writes to one or more buffers or textures, its return type must be void. + +// no preprocessor to do this yet, so have to add functions +// can't seem to have overloads like this with same name +inline float4 mulr(float4 v, float4x4 m) { return mul(m,v); } +inline float3 mulr(float3 v, float3x3 m) { return mul(m,v); } +inline half3 mulr(half3 v, half3x3 m) { return mul(m,v); } + +struct InputVS +{ + float4 position : SV_Position; + float3 normal : NORMAL; + float2 uv : TEXCOORD0; + float4 blendWeights : BLENDWEIGHT; + uint4 blendIndices : BLENDINDICES; +}; + +// these were just tests +//short4 testShort : TANGENT; +//ushort4 testUShort : BITANGENT; + +struct OutputVS +{ + float4 position : SV_Position; + halfio diffuse : COLOR; + float2 uv : TEXCOORD0; +}; + +struct OutputPS +{ + half4io color : SV_Target0; +}; + +static const uint kMaxSkinTfms = 256; + +// Note: SkinTfms makes MSL air shader 60K bigger at 256, +// so may want to limit large hardcoded arrays. It's 61K @256, and 7.8K @1. +// Doesn't seem to affect spriv at 4K. +// GameShaders.metallib is 33K, and Zip of Spirv is 6K. +struct UniformsStruct +{ + float4x4 skinTfms[kMaxSkinTfms]; + + half3st lightDir; + float4x4 worldToClipTfm; +}; +ConstantBuffer uniforms : register(b0); + +// can have 14x 64K limit to each cbuffer, 128 tbuffers, + +// Structured buffers +//struct StructuredStruct +//{ +// half3st lightDir; +// float4x4 worldToClipTfm; +//}; +// +//StructuredBuffer bufferTest0 : register(t2); + + +Texture2D tex : register(t1); +SamplerState samplerClamp : register(s0); + +float4x4 DoSkinTfm(/*UniformsStruct uniforms,*/ float4 blendWeights, uint4 blendIndices) +{ + // weight the transforms, could use half4x4 and 101010A2 for weights + float4x4 skinTfm = blendWeights[0] * uniforms.skinTfms[blendIndices[0]]; + + for (uint i = 1; i < 4; ++i) + { + skinTfm += blendWeights[i] * uniforms.skinTfms[blendIndices[i]]; + } + + return skinTfm; +} + +// TODO: this isn't working, wanted to share OutputVS, so left out of that +// and moved to SkinningVS as output. But something thinks out is type. +// , out float pointSize : PSIZE + +// TODO: fix ability to comment out inputs +OutputVS SkinningVS(InputVS input, + uint vertexBase : BASEVERTEX, + uint instanceBase : BASEINSTANCE, + uint instanceID : SV_InstanceID, + uint vertexID : SV_VertexID +) +{ + OutputVS output; + + // TODO: this needs to declare array param as constant for + // MSL function call. Pointers can't be missing working space. + + // this is just to use these + //uint vertexNum = vertexID; + //uint instanceNum = instanceID; + + uint vertexNum = vertexBase + vertexID; + uint instanceNum = instanceBase + instanceID; + + instanceNum += vertexNum; + + // not using above + + // float4x4 skinTfm = uniforms.skinTfms[ instanceNum ]; + + float4x4 skinTfm = DoSkinTfm(input.blendWeights, input.blendIndices); + + // Skin to world space + float3 normal = mulr(input.normal, (float3x3)skinTfm); + normal = mulr(normal, (float3x3)uniforms.worldToClipTfm); + + // Output stuff + float4 worldPos = mulr(input.position, skinTfm); + output.position = mulr(worldPos, uniforms.worldToClipTfm); + + output.diffuse = (halfio)dot((half3)uniforms.lightDir, (half3)normal); + + // test structured buffer + // StructuredStruct item = bufferTest0[0]; + //output.diffuse *= item.lightDir.x; + + // test the operators +// output.diffuse *= output.diffuse; +// output.diffuse += output.diffuse; +// output.diffuse -= output.diffuse; +// output.diffuse /= output.diffuse; + + output.uv = input.uv; + + // only for Vulkan/MSL, DX12 can't control per vertex shader + //pointSize = 1; + + return output; +} + + + +// Note: don't write as void SkinningPS(VS_OUTPUT input, out PS_OUTPUT output) +// this is worse MSL codegen. + +// TODO: SV_Position differs from Vulkan/MSL in that pos.w = w and not 1/w like gl_FragCoord and [[position]]. +// DXC has a setting to invert w. + +OutputPS SkinningPS(OutputVS input, + bool isFrontFace: SV_IsFrontFace + ) +{ + half4 color = tex.Sample(samplerClamp, input.uv); + color.rgb = min3(color.r, color.g, color.b); + color.rgb *= (half)input.diffuse; + + OutputPS output; + output.color = (half4io)color; + return output; +} diff --git a/hlslparser/src/CodeWriter.cpp b/hlslparser/src/CodeWriter.cpp new file mode 100644 index 00000000..c8af3706 --- /dev/null +++ b/hlslparser/src/CodeWriter.cpp @@ -0,0 +1,122 @@ +//============================================================================= +// +// Render/CodeWriter.cpp +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#include "CodeWriter.h" + +#include + +#include "Engine.h" + +namespace M4 { +CodeWriter::CodeWriter() +{ + m_currentLine = 1; + m_currentFileName = NULL; + m_spacesPerIndent = 4; + m_writeFileLine = false; +} + +void CodeWriter::BeginLine(int indent, const char* fileName, int lineNumber) +{ + // probably missing an EndLine + ASSERT(m_currentIndent == 0); + + if (m_writeFileLine) { + bool outputLine = false; + bool outputFile = false; + + // Output a line number pragma if necessary. + if (fileName != NULL && m_currentFileName != fileName) { + m_currentFileName = fileName; + fileName = m_currentFileName; + outputFile = true; + } + if (lineNumber != -1 && m_currentLine != lineNumber) { + m_currentLine = lineNumber; + outputLine = true; + } + + // if previous filename is same, only output line + if (outputFile) { + String_Printf(m_buffer, "#line %d \"%s\"\n", lineNumber, fileName); + } + else if (outputLine) { + String_Printf(m_buffer, "#line %d\n", lineNumber); + } + } + + // Handle the indentation. + if (indent) + Write("%*s", indent * m_spacesPerIndent, ""); + + m_currentIndent = indent; +} + +int CodeWriter::EndLine(const char* text) +{ + if (text != NULL) { + m_buffer += text; + } + m_buffer += "\n"; + ++m_currentLine; + + // so can EndLine/BeginLine + int indent = m_currentIndent; + m_currentIndent = 0; + return indent; +} + +void CodeWriter::Write(const char* format, ...) +{ + va_list args; + va_start(args, format); + int result = String_PrintfArgList(m_buffer, format, args); + ASSERT(result != -1); + va_end(args); +} + +void CodeWriter::WriteLine(int indent, const char* format, ...) +{ + if (indent) + Write("%*s", indent * m_spacesPerIndent, ""); + + va_list args; + va_start(args, format); + int result = String_PrintfArgList(m_buffer, format, args); + ASSERT(result != -1); + va_end(args); + + EndLine(); +} + +void CodeWriter::WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...) +{ + // TODO: this should make sure that line isn't already Begu + BeginLine(indent, fileName, lineNumber); + + va_list args; + va_start(args, format); + int result = String_PrintfArgList(m_buffer, format, args); + ASSERT(result != -1); + va_end(args); + + EndLine(); +} + +const char* CodeWriter::GetResult() const +{ + return m_buffer.c_str(); +} + +void CodeWriter::Reset() +{ + m_buffer.clear(); +} + +} //namespace M4 diff --git a/hlslparser/src/CodeWriter.h b/hlslparser/src/CodeWriter.h new file mode 100644 index 00000000..5454fd71 --- /dev/null +++ b/hlslparser/src/CodeWriter.h @@ -0,0 +1,50 @@ +//============================================================================= +// +// Render/CodeWriter.h +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#pragma once + +#include "Engine.h" + +// stl +#include + +namespace M4 { + +class Allocator; + +/** + * This class is used for outputting code. It handles indentation and inserting #line markers + * to match the desired output line numbers. + */ +class CodeWriter { +public: + CodeWriter(); + + void SetWriteFileLine(bool enable) { m_writeFileLine = enable; } + + void BeginLine(int indent, const char* fileName = NULL, int lineNumber = -1); + void Write(const char* format, ...) M4_PRINTF_ATTR(2, 3); + int EndLine(const char* text = NULL); + + void WriteLine(int indent, const char* format, ...) M4_PRINTF_ATTR(3, 4); + void WriteLineTagged(int indent, const char* fileName, int lineNumber, const char* format, ...) M4_PRINTF_ATTR(5, 6); + + const char* GetResult() const; + void Reset(); + +private: + std::string m_buffer; + int m_currentLine; + const char* m_currentFileName; + int m_spacesPerIndent; + int m_currentIndent; + bool m_writeFileLine; +}; + +} //namespace M4 diff --git a/hlslparser/src/Engine.cpp b/hlslparser/src/Engine.cpp new file mode 100644 index 00000000..85b1c1c2 --- /dev/null +++ b/hlslparser/src/Engine.cpp @@ -0,0 +1,354 @@ + +#include "Engine.h" + +#include // vsnprintf +#include // strtod, strtol +#include // strcmp, strcasecmp + +// this is usually just an unordered_map internally +#include + +namespace M4 { + +// Engine/String.cpp + +void String_Copy(char* str, const char* b, uint32_t size) +{ +#ifdef WIN32 + strncpy(str, b, size); + str[size - 1] = 0; +#else + strlcpy(str, b, size); +#endif +} + +// This version doesn't truncate and is simpler +int String_PrintfArgList(std::string& buffer, const char* format, va_list args) +{ + int n = 0; + + if (!String_HasChar(format, '%')) { + buffer += format; + n = (uint32_t)strlen(format); + } + else if (String_Equal(format, "%s")) { + va_list tmp; + va_copy(tmp, args); + const char* text = va_arg(args, const char*); + n = (uint32_t)strlen(text); + buffer += text; + va_end(tmp); + } + else { + va_list tmp; + va_copy(tmp, args); + + int len = vsnprintf(nullptr, 0, format, tmp); + if (len >= 0) { + size_t bufferLength = buffer.length(); + buffer.resize(bufferLength + len); + vsnprintf((char*)buffer.data() + bufferLength, len + 1, format, tmp); + + n = len; + } + va_end(tmp); + } + + return n; +} + +// This version truncates but works on stack +int String_PrintfArgList(char* buffer, int size, const char* format, va_list args) +{ + int n; + + if (!String_HasChar(format, '%')) { + String_Copy(buffer, format, size); + + // truncation or not + n = (int)strlen(format); + } + else if (String_Equal(format, "%s")) { + va_list tmp; + va_copy(tmp, args); + const char* text = va_arg(args, const char*); + n = (int)strlen(text); + + // truncation + String_Copy(buffer, text, size); + va_end(tmp); + } + else { + va_list tmp; + va_copy(tmp, args); + + // truncation + // vsnprint returns -1 on failure + n = vsnprintf(buffer, size, format, tmp); + va_end(tmp); + } + + if (n < 0 || (n + 1) > size) + return -1; + + return n; +} + +int String_Printf(std::string& buffer, const char* format, ...) +{ + va_list args; + va_start(args, format); + + int n = String_PrintfArgList(buffer, format, args); + + va_end(args); + + return n; +} + +int String_Printf(char* buffer, int size, const char* format, ...) +{ + va_list args; + va_start(args, format); + + int n = String_PrintfArgList(buffer, size, format, args); + + va_end(args); + + return n; +} + +int String_FormatFloat(char* buffer, int size, float value) +{ + return String_Printf(buffer, size, "%.6f", value); +} + +bool String_HasChar(const char* str, char c) +{ + return strchr(str, c) != NULL; +} + +bool String_HasString(const char* str, const char* search) +{ + return strstr(str, search) != NULL; +} + +bool String_Equal(const char* a, const char* b) +{ + if (a == b) return true; + if (a == NULL || b == NULL) return false; + return strcmp(a, b) == 0; +} + +bool String_EqualNoCase(const char* a, const char* b) +{ + if (a == b) return true; + if (a == NULL || b == NULL) return false; +#if _MSC_VER + return _stricmp(a, b) == 0; +#else + return strcasecmp(a, b) == 0; +#endif +} + +double String_ToDouble(const char* str, char** endptr) +{ + return strtod(str, endptr); +} + +float String_ToFloat(const char* str, char** endptr) +{ + return strtof(str, endptr); +} + +static const int kBase10 = 10; +static const int kBase16 = 16; + +int32_t String_ToIntHex(const char* str, char** endptr) +{ + return (int)strtol(str, endptr, kBase16); +} + +int32_t String_ToInt(const char* str, char** endptr) +{ + return (int)strtol(str, endptr, kBase10); +} + +uint32_t String_ToUint(const char* str, char** endptr) +{ + return (int)strtoul(str, endptr, kBase10); +} + +uint64_t String_ToUlong(const char* str, char** endptr) +{ + return (int)strtoull(str, endptr, kBase10); +} + +int64_t String_ToLong(const char* str, char** endptr) +{ + return (int)strtoll(str, endptr, kBase10); +} + +void String_StripTrailingFloatZeroes(char* buffer) +{ + const char* dotPos = strrchr(buffer, '.'); + if (dotPos == nullptr) return; + + uint32_t bufferLen = (uint32_t)strlen(buffer); + + // strip trailing zeroes + while (bufferLen > 0) { + char& c = buffer[bufferLen - 1]; + if (c == '0') { + c = 0; + bufferLen--; + } + else { + break; + } + } + + // This breaks appending h to a number in MSL + // strip the period (only for MSL) + // char& c = buffer[bufferLen-1]; + // if (dotPos == &c) + // { + // c = 0; + // bufferLen--; + // } +} + +// Engine/Log.cpp + +void Log_Error(const char* format, ...) +{ + va_list args; + va_start(args, format); + Log_ErrorArgList(format, args); + va_end(args); +} + +void Log_ErrorArgList(const char* format, va_list args, const char* filename, uint32_t line) +{ + va_list tmp; + va_copy(tmp, args); + + // Not thread-safe + static std::string buffer; + buffer.clear(); + String_PrintfArgList(buffer, format, tmp); + + // TODO: this doesn't work on Win/Android + // use a real log abstraction to ODS/etc from Kram + if (filename) + fprintf(stderr, "%s:%d: error: %s", filename, line, buffer.c_str()); + else + fprintf(stderr, "error: %s", buffer.c_str()); + + va_end(tmp); +} + +// Engine/StringPool.cpp + +using StringPoolSet = std::unordered_set; + +#define CastImpl(imp) (StringPoolSet*)imp + +StringPool::StringPool(Allocator* allocator) +{ + // NOTE: allocator not used + + m_impl = new StringPoolSet(); +} +StringPool::~StringPool() +{ + auto* impl = CastImpl(m_impl); + + // delete the strings + for (auto it : *impl) { + const char* text = it; + free((char*)text); + } + + delete impl; +} + +const char* StringPool::AddString(const char* text) +{ + auto* impl = CastImpl(m_impl); + auto it = impl->find(text); + if (it != impl->end()) + return *it; + + // _strdup doesn't go through allocator either +#if _MSC_VER + const char* dup = _strdup(text); +#else + const char* dup = strdup(text); +#endif + + impl->insert(dup); + return dup; +} + +const char* StringPool::PrintFormattedVaList(const char* fmt, va_list args) +{ + char* res = nullptr; + + va_list tmp; + + // va_copy needed? + va_copy(tmp, args); + + // just call 2x, once for len + int len = vsnprintf(nullptr, 0, fmt, tmp); + if (len >= 0) { + res = (char*)malloc(len + 1); + vsnprintf(res, len + 1, fmt, tmp); + } + va_end(tmp); + + // caller responsible for freeing mem + return res; +} + +const char* StringPool::AddStringFormatList(const char* format, va_list args) +{ + // don't format if no tokens + va_list tmp; + va_copy(tmp, args); + const char* text = PrintFormattedVaList(format, tmp); + va_end(tmp); + + auto* impl = CastImpl(m_impl); + + // add it if not found + auto it = impl->find(text); + if (it == impl->end()) { + impl->insert(text); + return text; + } + + // allocated inside PrintFormattedVaList + free((char*)text); + return *it; +} + +const char* StringPool::AddStringFormat(const char* format, ...) +{ + // TODO: don't format if no tokens + va_list args; + va_start(args, format); + const char* string = AddStringFormatList(format, args); + va_end(args); + + return string; +} + +bool StringPool::GetContainsString(const char* text) const +{ + const auto* impl = CastImpl(m_impl); + return impl->find(text) != impl->end(); +} + +} //namespace M4 diff --git a/hlslparser/src/Engine.h b/hlslparser/src/Engine.h new file mode 100644 index 00000000..43535cd9 --- /dev/null +++ b/hlslparser/src/Engine.h @@ -0,0 +1,272 @@ +#pragma once + +#if _MSC_VER +#define _CRT_SECURE_NO_WARNINGS 1 +#endif + +#include // va_list, vsnprintf +#include // malloc + +#include // for placement new + +// stl +#include + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef va_copy +#define va_copy(a, b) (a) = (b) +#endif + +// Engine/Assert.h +#include +//#define assert(...) +#define ASSERT assert + +// this is similar to printflike macro, checks format args +#if defined(__GNUC__) || defined(__clang__) +#define M4_PRINTF_ATTR(string_index, first_to_check) __attribute__((format(printf, string_index, first_to_check))) +#else +#define M4_PRINTF_ATTR(string_index, first_to_check) +#endif + +namespace M4 { + +// Engine/Allocator.h + +// This doesn't do placement new/delete, but is only +// used to allocate NodePage and StringPool. Then placement +// new/delete is called explicitly by say NewNode. So +// there default ctor variable initializers are safe to use. +class Allocator { +public: + template + T* New() + { + return (T*)malloc(sizeof(T)); + } + template + T* New(size_t count) + { + return (T*)malloc(sizeof(T) * count); + } + template + void Delete(T* ptr) + { + free((void*)ptr); + } + template + T* Realloc(T* ptr, size_t count) + { + return (T*)realloc(ptr, sizeof(T) * count); + } +}; + +// Engine/String.h + +int String_FormatFloat(char* buffer, int size, float value); +bool String_Equal(const char* a, const char* b); +bool String_EqualNoCase(const char* a, const char* b); + +double String_ToDouble(const char* str, char** end); +float String_ToFloat(const char* str, char** end); +// no half + +int32_t String_ToIntHex(const char* str, char** end); +int32_t String_ToInt(const char* str, char** end); +uint32_t String_ToUint(const char* str, char** end); + +uint64_t String_ToUlong(const char* str, char** end); +int64_t String_ToLong(const char* str, char** end); + +bool String_HasChar(const char* str, char c); +bool String_HasString(const char* str, const char* search); + +// just use these, it's way easier than using fixed buffers +int String_PrintfArgList(std::string& buffer, const char* format, va_list args); +int String_Printf(std::string& buffer, const char* format, ...) M4_PRINTF_ATTR(2, 3); + +// These 3 calls have truncation issues +int String_Printf(char* buffer, int size, const char* format, ...) M4_PRINTF_ATTR(3, 4); +int String_PrintfArgList(char* buffer, int size, const char* format, va_list args); +void String_Copy(char* str, const char* b, uint32_t size); + +void String_StripTrailingFloatZeroes(char* buffer); + +// Hash and Compare are taken out of kram +// case sensitive fnv1a hash, can pass existing hash to continue a hash +inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) +{ + const uint32_t prime = 0x01000193; // 16777619 (32-bit) + while (*val) { + hash = (hash * prime) ^ (uint32_t)*val++; + } + return hash; +} + +// this compares string stored as const char* +struct CompareAndHandStrings { + template + bool operator()(const _Tp& __x, const _Tp& __y) const + { + return String_Equal(__x, __y); + } + + template + size_t operator()(const _Tp& __x) const + { + // assumes 32-bit hash to int64 conversion here + return (size_t)HashFnv1a(__x); + } +}; + +// Engine/Log.h + +void Log_Error(const char* format, ...) M4_PRINTF_ATTR(1, 2); + +void Log_ErrorArgList(const char* format, va_list args, const char* filename = NULL, uint32_t line = 0); + +// Engine/Array.h + +template +void ConstructRange(T* buffer, int new_size, int old_size) +{ + for (int i = old_size; i < new_size; i++) { + new (buffer + i) T; // placement new + } +} + +template +void ConstructRange(T* buffer, int new_size, int old_size, const T& val) +{ + for (int i = old_size; i < new_size; i++) { + new (buffer + i) T(val); // placement new + } +} + +template +void DestroyRange(T* buffer, int new_size, int old_size) +{ + for (int i = new_size; i < old_size; i++) { + (buffer + i)->~T(); // Explicit call to the destructor + } +} + +template +class Array { +public: + Array(Allocator* allocator) : allocator(allocator), buffer(NULL), size(0), capacity(0) {} + + void PushBack(const T& val) + { + ASSERT(&val < buffer || &val >= buffer + size); + + int old_size = size; + int new_size = size + 1; + + SetSize(new_size); + + ConstructRange(buffer, new_size, old_size, val); + } + T& PushBackNew() + { + int old_size = size; + int new_size = size + 1; + + SetSize(new_size); + + ConstructRange(buffer, new_size, old_size); + + return buffer[old_size]; + } + void Resize(int new_size) + { + int old_size = size; + + DestroyRange(buffer, new_size, old_size); + + SetSize(new_size); + + ConstructRange(buffer, new_size, old_size); + } + + int GetSize() const { return size; } + const T& operator[](int i) const + { + ASSERT(i < size); + return buffer[i]; + } + T& operator[](int i) + { + ASSERT(i < size); + return buffer[i]; + } + +private: + // Change array size. + void SetSize(int new_size) + { + size = new_size; + + if (new_size > capacity) { + int new_buffer_size; + if (capacity == 0) { + // first allocation is exact + new_buffer_size = new_size; + } + else { + // following allocations grow array by 25% + new_buffer_size = new_size + (new_size >> 2); + } + + SetCapacity(new_buffer_size); + } + } + + // Change array capacity. + void SetCapacity(int new_capacity) + { + ASSERT(new_capacity >= size); + + if (new_capacity == 0) { + // free the buffer. + if (buffer != NULL) { + allocator->Delete(buffer); + buffer = NULL; + } + } + else { + // realloc the buffer + buffer = allocator->Realloc(buffer, new_capacity); + } + + capacity = new_capacity; + } + +private: + Allocator* allocator; // @@ Do we really have to keep a pointer to this? + T* buffer; + int size; + int capacity; +}; + +// Engine/StringPool.h + +// @@ Implement this with a hash table! +struct StringPool { + StringPool(Allocator* allocator); + ~StringPool(); + + const char* AddString(const char* text); + const char* AddStringFormat(const char* fmt, ...) M4_PRINTF_ATTR(2, 3); + const char* AddStringFormatList(const char* fmt, va_list args); + bool GetContainsString(const char* text) const; + +private: + const char* PrintFormattedVaList(const char* fmt, va_list args); + void* m_impl = NULL; +}; + +} //namespace M4 diff --git a/hlslparser/src/HLSLGenerator.cpp b/hlslparser/src/HLSLGenerator.cpp new file mode 100644 index 00000000..9c78e26c --- /dev/null +++ b/hlslparser/src/HLSLGenerator.cpp @@ -0,0 +1,1078 @@ +//============================================================================= +// +// Render/HLSLGenerator.cpp +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#include "HLSLGenerator.h" + +#include "Engine.h" +#include "HLSLParser.h" +#include "HLSLTree.h" + +namespace M4 { + +const char* HLSLGenerator::GetTypeName(const HLSLType& type) +{ + bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0); + + // number + bool isHalfNumerics = promote && !m_options.treatHalfAsFloat; + HLSLBaseType baseType = type.baseType; + + // Note: these conversions should really be done during parsing + // so that casting gets applied. + if (!isHalfNumerics) + baseType = HalfToFloatBaseType(baseType); + + // MSL doesn't support double, and many HLSL cards don't either. + //if (IsDouble(baseType)) + // baseType = DoubleToFloatBaseType(baseType); + + HLSLType remappedType(type); + remappedType.baseType = baseType; + + // DONE: these can all just use a table entry, have another slot for MSL + // Functions can return void, especially with compute + if (IsTextureType(baseType) || IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined) + return GetTypeNameHLSL(remappedType); + + Error("Unknown type"); + return NULL; +} + +// TODO: copied from MSLGenerator +// @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression. +static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) +{ + // For now we just omit the parenthesis if there's no parent expression. + if (parentExpression == NULL) { + return false; + } + + // One more special case that's pretty common. + if (parentExpression->nodeType == HLSLNodeType_MemberAccess) { + if (expression->nodeType == HLSLNodeType_IdentifierExpression || + expression->nodeType == HLSLNodeType_ArrayAccess || + expression->nodeType == HLSLNodeType_MemberAccess) { + return false; + } + } + + return true; +} + +/* unused +static int GetFunctionArguments(HLSLFunctionCall* functionCall, HLSLExpression* expression[], int maxArguments) +{ + HLSLExpression* argument = functionCall->argument; + int numArguments = 0; + while (argument != NULL) + { + if (numArguments < maxArguments) + { + expression[numArguments] = argument; + } + argument = argument->nextExpression; + ++numArguments; + } + return numArguments; +} +*/ + +HLSLGenerator::HLSLGenerator() +{ + m_tree = NULL; + m_entryName = NULL; + m_target = HLSLTarget_VertexShader; + m_isInsideBuffer = false; + m_error = false; +} + +// @@ We need a better way of doing semantic replacement: +// - Look at the function being generated. +// - Return semantic, semantics associated to fields of the return structure, or output arguments, or fields of structures associated to output arguments -> output semantic replacement. +// - Semantics associated input arguments or fields of the input arguments -> input semantic replacement. +static const char* TranslateSemantic(const char* semantic, bool output, HLSLTarget target) +{ + // Note: these are all just passthrough of the DX10 semantics + // except for BASEVERTEX/INSTANCE which doesn't seem to dxc compile. + + if (target == HLSLTarget_VertexShader) { + if (output) { + } + else { + // see here for sample of builtin notation + // https://github.com/microsoft/DirectXShaderCompiler/commit/b6fe9886ad + + // Vulkan/MSL only, requires ext DrawParameters + // [[vk::builtin(\"BaseVertex\")]] uint baseVertex : + // [[vk::builtin(\"BaseInstance\")]] uint instance : SV_BaseInstance + + if (String_Equal(semantic, "BASEVERTEX")) + return "BaseVertex"; // vulkan only + if (String_Equal(semantic, "BASEINSTANCE")) + return "BaseInstance"; // vulkan only + } + } + else if (target == HLSLTarget_PixelShader) { + if (output) { + } + else { + } + } + else if (target == HLSLTarget_ComputeShader) { + if (output) { + } + else { + } + } + return NULL; +} + +void HLSLGenerator::Error(const char* format, ...) +{ + // It's not always convenient to stop executing when an error occurs, + // so just track once we've hit an error and stop reporting them until + // we successfully bail out of execution. + if (m_error) { + return; + } + m_error = true; + + va_list arg; + va_start(arg, format); + Log_ErrorArgList(format, arg); + va_end(arg); +} + +bool HLSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options) +{ + m_tree = tree; + m_entryName = entryName; + m_target = target; + m_isInsideBuffer = false; + + m_options = options; + m_writer.SetWriteFileLine(options.writeFileLine); + + m_writer.Reset(); + + // Find entry point function + HLSLFunction* entryFunction = tree->FindFunction(entryName); + if (entryFunction == NULL) { + Error("Entry point '%s' doesn't exist\n", entryName); + return false; + } + + // PruneTree resets hidden flags to true, then marks visible elements + // based on whether entry point visits them. + PruneTree(tree, entryFunction->name); // Note: takes second entry + + // This sorts tree by type, but keeps ordering + SortTree(tree); + + // This strips any unused inputs to the entry point function + HideUnusedArguments(entryFunction); + + // Is this needed + FlattenExpressions(tree); + + m_writer.WriteLine(0, "#include \"ShaderHLSL.h\""); + + // @@ Should we generate an entirely new copy of the tree so that we can modify it in place? + //if (!legacy) + { + HLSLFunction* function = tree->FindFunction(entryName); + + // Handle return value semantics + if (function->semantic != NULL) { + function->sv_semantic = TranslateSemantic(function->semantic, /*output=*/true, target); + } + if (function->returnType.baseType == HLSLBaseType_UserDefined) { + HLSLStruct* s = tree->FindGlobalStruct(function->returnType.typeName); + + HLSLStructField* sv_fields = NULL; + + HLSLStructField* lastField = NULL; + HLSLStructField* field = s->field; + while (field) { + HLSLStructField* nextField = field->nextField; + + // TODO: may have to be careful with SV_Position, since this puts + // those last. SSBO won't use those semantics, so should be okay. + + if (field->semantic) { + field->hidden = false; + field->sv_semantic = TranslateSemantic(field->semantic, /*output=*/true, target); + + // Fields with SV semantics are stored at the end to avoid linkage problems. + if (field->sv_semantic != NULL) { + // Unlink from last. + if (lastField != NULL) + lastField->nextField = nextField; + else + s->field = nextField; + + // Add to sv_fields. + field->nextField = sv_fields; + sv_fields = field; + } + } + + if (field != sv_fields) lastField = field; + field = nextField; + } + + // Append SV fields at the end. + if (sv_fields != NULL) { + if (lastField == NULL) { + s->field = sv_fields; + } + else { + ASSERT(lastField->nextField == NULL); + lastField->nextField = sv_fields; + } + } + } + + // Handle argument semantics. + // @@ It would be nice to flag arguments that are used by the program and skip or hide the unused ones. + HLSLArgument* argument = function->argument; + while (argument) { + bool output = argument->modifier == HLSLArgumentModifier_Out; + if (argument->semantic) { + argument->sv_semantic = TranslateSemantic(argument->semantic, output, target); + } + + if (argument->type.baseType == HLSLBaseType_UserDefined) { + HLSLStruct* s = tree->FindGlobalStruct(argument->type.typeName); + + HLSLStructField* field = s->field; + while (field) { + if (field->semantic) { + field->hidden = false; + field->sv_semantic = TranslateSemantic(field->semantic, output, target); + } + + field = field->nextField; + } + } + + argument = argument->nextArgument; + } + } + + HLSLRoot* root = m_tree->GetRoot(); + OutputStatements(0, root->statement); + + m_tree = NULL; + return true; +} + +const char* HLSLGenerator::GetResult() const +{ + return m_writer.GetResult(); +} + +void HLSLGenerator::OutputExpressionList(HLSLExpression* expression) +{ + int numExpressions = 0; + while (expression != NULL) { + if (numExpressions > 0) { + m_writer.Write(", "); + } + OutputExpression(expression); + expression = expression->nextExpression; + ++numExpressions; + } +} + +void HLSLGenerator::OutputExpression(HLSLExpression* expression) +{ + if (expression->nodeType == HLSLNodeType_IdentifierExpression) { + HLSLIdentifierExpression* identifierExpression = static_cast(expression); + const char* name = identifierExpression->name; + + m_writer.Write("%s", name); + } + else if (expression->nodeType == HLSLNodeType_CastingExpression) { + HLSLCastingExpression* castingExpression = static_cast(expression); + m_writer.Write("("); + // OutputDeclaration(castingExpression->type, ""); // old - adds space after type + OutputDeclarationType(castingExpression->type, true /*isTypeCast*/); // new + m_writer.Write(")"); + + // These parens may not be needed + m_writer.Write("("); + OutputExpression(castingExpression->expression); + m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_ConstructorExpression) { + HLSLConstructorExpression* constructorExpression = static_cast(expression); + m_writer.Write("%s(", GetTypeName(constructorExpression->type)); + OutputExpressionList(constructorExpression->argument); + m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_LiteralExpression) { + HLSLLiteralExpression* literalExpression = static_cast(expression); + + HLSLBaseType type = literalExpression->type; + if (m_options.treatHalfAsFloat && IsHalf(type)) + type = HLSLBaseType_Float; + + switch (type) { + case HLSLBaseType_Half: + case HLSLBaseType_Float: + case HLSLBaseType_Double: { + // Don't use printf directly so that we don't use the system locale. + char buffer[64]; + String_FormatFloat(buffer, sizeof(buffer), literalExpression->fValue); + String_StripTrailingFloatZeroes(buffer); + m_writer.Write("%s%s", buffer, type == HLSLBaseType_Half ? "h" : ""); + } break; + + case HLSLBaseType_Short: + case HLSLBaseType_Ulong: + case HLSLBaseType_Int: + m_writer.Write("%d", literalExpression->iValue); + break; + // TODO: missing uint, u/short, u/long double + + case HLSLBaseType_Bool: + m_writer.Write("%s", literalExpression->bValue ? "true" : "false"); + break; + default: + Error("Unhandled literal"); + //ASSERT(false); + } + } + else if (expression->nodeType == HLSLNodeType_UnaryExpression) { + HLSLUnaryExpression* unaryExpression = static_cast(expression); + const char* op = "?"; + bool pre = true; + switch (unaryExpression->unaryOp) { + case HLSLUnaryOp_Negative: + op = "-"; + break; + case HLSLUnaryOp_Positive: + op = "+"; + break; + case HLSLUnaryOp_Not: + op = "!"; + break; + case HLSLUnaryOp_PreIncrement: + op = "++"; + break; + case HLSLUnaryOp_PreDecrement: + op = "--"; + break; + case HLSLUnaryOp_PostIncrement: + op = "++"; + pre = false; + break; + case HLSLUnaryOp_PostDecrement: + op = "--"; + pre = false; + break; + case HLSLUnaryOp_BitNot: + op = "~"; + break; + } + + // eliminate () if pure characters + bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression); + if (addParenthesis) m_writer.Write("("); + + if (pre) { + m_writer.Write("%s", op); + OutputExpression(unaryExpression->expression); + } + else { + OutputExpression(unaryExpression->expression); + m_writer.Write("%s", op); + } + if (addParenthesis) m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_BinaryExpression) { + HLSLBinaryExpression* binaryExpression = static_cast(expression); + + // TODO: to fix this need to pass in parentExpression to + // the call. And MSLGenerator passes NULL for most of these. + // TODO: eliminate () if pure characters + + bool addParenthesis = false; // NeedsParenthesis(expression, parentExpression); + if (addParenthesis) m_writer.Write("("); + + OutputExpression(binaryExpression->expression1); + const char* op = "?"; + switch (binaryExpression->binaryOp) { + case HLSLBinaryOp_Add: + op = " + "; + break; + case HLSLBinaryOp_Sub: + op = " - "; + break; + case HLSLBinaryOp_Mul: + op = " * "; + break; + case HLSLBinaryOp_Div: + op = " / "; + break; + case HLSLBinaryOp_Less: + op = " < "; + break; + case HLSLBinaryOp_Greater: + op = " > "; + break; + case HLSLBinaryOp_LessEqual: + op = " <= "; + break; + case HLSLBinaryOp_GreaterEqual: + op = " >= "; + break; + case HLSLBinaryOp_Equal: + op = " == "; + break; + case HLSLBinaryOp_NotEqual: + op = " != "; + break; + case HLSLBinaryOp_Assign: + op = " = "; + break; + case HLSLBinaryOp_AddAssign: + op = " += "; + break; + case HLSLBinaryOp_SubAssign: + op = " -= "; + break; + case HLSLBinaryOp_MulAssign: + op = " *= "; + break; + case HLSLBinaryOp_DivAssign: + op = " /= "; + break; + case HLSLBinaryOp_And: + op = " && "; + break; + case HLSLBinaryOp_Or: + op = " || "; + break; + case HLSLBinaryOp_BitAnd: + op = " & "; + break; + case HLSLBinaryOp_BitOr: + op = " | "; + break; + case HLSLBinaryOp_BitXor: + op = " ^ "; + break; + default: + Error("Unhandled binary op"); + //ASSERT(false); + } + m_writer.Write("%s", op); + OutputExpression(binaryExpression->expression2); + if (addParenthesis) m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_ConditionalExpression) { + HLSLConditionalExpression* conditionalExpression = static_cast(expression); + + // TODO: eliminate () if pure characters + m_writer.Write("(("); + OutputExpression(conditionalExpression->condition); + m_writer.Write(")?("); + OutputExpression(conditionalExpression->trueExpression); + m_writer.Write("):("); + OutputExpression(conditionalExpression->falseExpression); + m_writer.Write("))"); + } + else if (expression->nodeType == HLSLNodeType_MemberAccess) { + HLSLMemberAccess* memberAccess = static_cast(expression); + + bool addParenthesis = NeedsParenthesis(memberAccess->object, expression); + + // eliminate () if pure characters + if (addParenthesis) m_writer.Write("("); + OutputExpression(memberAccess->object); + if (addParenthesis) m_writer.Write(")"); + m_writer.Write(".%s", memberAccess->field); + } + else if (expression->nodeType == HLSLNodeType_ArrayAccess) { + HLSLArrayAccess* arrayAccess = static_cast(expression); + OutputExpression(arrayAccess->array); + m_writer.Write("["); + OutputExpression(arrayAccess->index); + m_writer.Write("]"); + } + else if (expression->nodeType == HLSLNodeType_FunctionCall) { + HLSLFunctionCall* functionCall = static_cast(expression); + const char* name = functionCall->function->name; + m_writer.Write("%s(", name); + OutputExpressionList(functionCall->argument); + m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_MemberFunctionCall) { + HLSLMemberFunctionCall* functionCall = static_cast(expression); + + // Spriv only supports fp32 or i32/i64 OpTypeImage + if (IsHalf(functionCall->function->returnType.baseType) && m_options.writeVulkan) { + // TODO: may need parens + m_writer.Write("(half4)"); + } + + // Write out the member identifier + m_writer.Write("%s.", functionCall->memberIdentifier->name); + + // Same as FunctionCall + const char* name = functionCall->function->name; + m_writer.Write("%s(", name); + OutputExpressionList(functionCall->argument); + m_writer.Write(")"); + } + else { + Error("unknown expression"); + } +} + +void HLSLGenerator::OutputArguments(HLSLArgument* argument) +{ + int numArgs = 0; + while (argument != NULL) { + if (numArgs > 0) { + int indent = m_writer.EndLine(","); + m_writer.BeginLine(indent); + } + + const char* semantic = argument->sv_semantic ? argument->sv_semantic : argument->semantic; + + // Have to inject vulkan + if (semantic && m_options.writeVulkan) { + if (String_Equal(semantic, "PSIZE")) + m_writer.Write("%s ", "[[vk::builtin(\"PointSize\")]]"); + else if (String_Equal(semantic, "BaseVertex")) + m_writer.Write("%s ", "[[vk::builtin(\"BaseVertex\")]]"); + else if (String_Equal(semantic, "BaseInstance")) + m_writer.Write("%s ", "[[vk::builtin(\"BaseInstance\")]]"); + } + + // Then modifier + switch (argument->modifier) { + case HLSLArgumentModifier_In: + m_writer.Write("in "); + break; + case HLSLArgumentModifier_Out: + m_writer.Write("out "); + break; + case HLSLArgumentModifier_Inout: + m_writer.Write("inout "); + break; + case HLSLArgumentModifier_Uniform: + m_writer.Write("uniform "); + break; + default: + break; + } + + OutputDeclaration(argument->type, argument->name, semantic, /*registerName=*/NULL, argument->defaultValue); + + argument = argument->nextArgument; + ++numArgs; + } +} + +static const char* GetAttributeName(HLSLAttributeType attributeType) +{ + if (attributeType == HLSLAttributeType_Unroll) return "unroll"; + if (attributeType == HLSLAttributeType_Branch) return "branch"; + if (attributeType == HLSLAttributeType_Flatten) return "flatten"; + return NULL; +} + +void HLSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute) +{ + while (attribute != NULL) { + const char* attributeName = GetAttributeName(attribute->attributeType); + + if (attributeName != NULL) { + m_writer.WriteLineTagged(indent, attribute->fileName, attribute->line, "[%s]", attributeName); + } + + attribute = attribute->nextAttribute; + } +} + +static const char* BufferTypeToName(HLSLBufferType bufferType) +{ + const char* name = ""; + switch (bufferType) { + case HLSLBufferType_CBuffer: + name = "cbuffer"; + break; + case HLSLBufferType_TBuffer: + name = "tbuffer"; + break; + + case HLSLBufferType_ConstantBuffer: + name = "ConstantBuffer"; + break; + case HLSLBufferType_StructuredBuffer: + name = "StructuredBuffer"; + break; + case HLSLBufferType_RWStructuredBuffer: + name = "RWStructuredBuffer"; + break; + case HLSLBufferType_ByteAddressBuffer: + name = "ByteAddressBuffer"; + break; + case HLSLBufferType_RWByteAddressBuffer: + name = "RWByteAddresssBuffer"; + break; + } + + return name; +} + +bool HLSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const +{ + if (!statement->written) return false; + + // only write these once for multi-entrypoint + if (statement->nodeType == HLSLNodeType_Comment || + statement->nodeType == HLSLNodeType_Buffer || + statement->nodeType == HLSLNodeType_Struct) + return true; + + // only write const scalars out once, so they don't conflict + if (statement->nodeType == HLSLNodeType_Declaration) { + const HLSLDeclaration* decl = (const HLSLDeclaration*)statement; + if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const) { + return true; + } + } + + // Helper functions should be skipped once written out + if (statement->nodeType == HLSLNodeType_Function) { + return true; + } + + return false; +} +void HLSLGenerator::OutputStatements(int indent, HLSLStatement* statement) +{ + while (statement != NULL) { + // skip pruned statements + if (statement->hidden) { + statement = statement->nextStatement; + continue; + } + + // skip writing some types across multiple entry points + if (CanSkipWrittenStatement(statement)) { + statement = statement->nextStatement; + continue; + } + statement->written = true; + + OutputAttributes(indent, statement->attributes); + + if (statement->nodeType == HLSLNodeType_Comment) { + HLSLComment* comment = static_cast(statement); + m_writer.WriteLine(indent, "//%s", comment->text); + } + else if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration* declaration = static_cast(statement); + m_writer.BeginLine(indent, declaration->fileName, declaration->line); + OutputDeclaration(declaration); + m_writer.EndLine(";"); + } + else if (statement->nodeType == HLSLNodeType_Struct) { + HLSLStruct* structure = static_cast(statement); + m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name); + HLSLStructField* field = structure->field; + while (field != NULL) { + if (!field->hidden) { + m_writer.BeginLine(indent + 1, field->fileName, field->line); + const char* semantic = field->sv_semantic ? field->sv_semantic : field->semantic; + OutputDeclaration(field->type, field->name, semantic); + m_writer.Write(";"); + m_writer.EndLine(); + } + field = field->nextField; + } + m_writer.WriteLine(indent, "};"); + } + else if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer* buffer = static_cast(statement); + HLSLDeclaration* field = buffer->field; + + if (!buffer->IsGlobalFields()) { + // Constant/Structured/ByteAdddressBuffer + m_writer.BeginLine(indent, buffer->fileName, buffer->line); + + // Handle push constant for Vulkan. + // This is just a buffer to MSL. + // VK is limited to 1 buffer as a result. Cannot contain half on AMD. + if (buffer->bufferType == HLSLBufferType_ConstantBuffer) { + if (m_options.writeVulkan && + (String_HasString(buffer->name, "Push") || + String_HasString(buffer->name, "push"))) { + m_writer.Write("[[vk::push_constant]] "); + } + } + + // write out template + m_writer.Write("%s<%s> %s", + BufferTypeToName(buffer->bufferType), + buffer->bufferStruct->name, + buffer->name); + + // write out optinal register + if (buffer->registerName != NULL) { + m_writer.Write(" : register(%s)", buffer->registerName); + } + + m_writer.Write(";"); + m_writer.EndLine(); + } + else { + // c/tbuffer + m_writer.BeginLine(indent, buffer->fileName, buffer->line); + + // not templated + m_writer.Write("%s %s", + BufferTypeToName(buffer->bufferType), + buffer->name); + + // write out optional register + if (buffer->registerName != NULL) { + m_writer.Write(" : register(%s)", buffer->registerName); + } + + m_writer.EndLine(" {"); + m_isInsideBuffer = true; + + while (field != NULL) { + if (!field->hidden) { + m_writer.BeginLine(indent + 1, field->fileName, field->line); + OutputDeclaration(field->type, field->name, /*semantic=*/NULL, /*registerName*/ field->registerName, field->assignment); + m_writer.Write(";"); + m_writer.EndLine(); + } + field = (HLSLDeclaration*)field->nextStatement; + } + + m_isInsideBuffer = false; + + m_writer.WriteLine(indent, "};"); + } + } + else if (statement->nodeType == HLSLNodeType_Function) { + HLSLFunction* function = static_cast(statement); + + // Use an alternate name for the function which is supposed to be entry point + // so that we can supply our own function which will be the actual entry point. + const char* functionName = function->name; + const char* returnTypeName = GetTypeName(function->returnType); + + bool isEntryPoint = String_Equal(functionName, m_entryName); + if (isEntryPoint) { + // This is a SM6.x construct for tagging entry points + switch (m_target) { + case HLSLTarget_VertexShader: + m_writer.WriteLine(indent, "[shader(\"vertex\")] "); + break; + case HLSLTarget_PixelShader: + m_writer.WriteLine(indent, "[shader(\"pixel\")] "); + break; + case HLSLTarget_ComputeShader: + m_writer.WriteLine(indent, "[shader(\"compute\")] "); + // TODO: hack, since don't actually parse bracket construct yet + m_writer.WriteLine(indent, "[numthreads(1,1,1)]"); + break; + } + } + + m_writer.BeginLine(indent, function->fileName, function->line); + m_writer.Write("%s %s(", returnTypeName, functionName); + + OutputArguments(function->argument); + + const char* semantic = function->sv_semantic ? function->sv_semantic : function->semantic; + if (semantic != NULL) { + m_writer.Write(") : %s {", semantic); + } + else { + m_writer.Write(") {"); + } + + m_writer.EndLine(); + + OutputStatements(indent + 1, function->statement); + m_writer.WriteLine(indent, "};"); + } + else if (statement->nodeType == HLSLNodeType_ExpressionStatement) { + HLSLExpressionStatement* expressionStatement = static_cast(statement); + m_writer.BeginLine(indent, statement->fileName, statement->line); + OutputExpression(expressionStatement->expression); + m_writer.EndLine(";"); + } + else if (statement->nodeType == HLSLNodeType_ReturnStatement) { + HLSLReturnStatement* returnStatement = static_cast(statement); + if (returnStatement->expression != NULL) { + m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line); + m_writer.Write("return "); + OutputExpression(returnStatement->expression); + m_writer.EndLine(";"); + } + else { + m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;"); + } + } + else if (statement->nodeType == HLSLNodeType_DiscardStatement) { + HLSLDiscardStatement* discardStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard;"); + } + else if (statement->nodeType == HLSLNodeType_BreakStatement) { + HLSLBreakStatement* breakStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;"); + } + else if (statement->nodeType == HLSLNodeType_ContinueStatement) { + HLSLContinueStatement* continueStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;"); + } + else if (statement->nodeType == HLSLNodeType_IfStatement) { + HLSLIfStatement* ifStatement = static_cast(statement); + m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line); + m_writer.Write("if ("); + OutputExpression(ifStatement->condition); + m_writer.Write(") {"); + m_writer.EndLine(); + OutputStatements(indent + 1, ifStatement->statement); + m_writer.WriteLine(indent, "}"); + if (ifStatement->elseStatement != NULL) { + m_writer.WriteLine(indent, "else {"); + OutputStatements(indent + 1, ifStatement->elseStatement); + m_writer.WriteLine(indent, "}"); + } + } + else if (statement->nodeType == HLSLNodeType_ForStatement) { + HLSLForStatement* forStatement = static_cast(statement); + m_writer.BeginLine(indent, forStatement->fileName, forStatement->line); + m_writer.Write("for ("); + OutputDeclaration(forStatement->initialization); + m_writer.Write("; "); + OutputExpression(forStatement->condition); + m_writer.Write("; "); + OutputExpression(forStatement->increment); + m_writer.Write(") {"); + m_writer.EndLine(); + OutputStatements(indent + 1, forStatement->statement); + m_writer.WriteLine(indent, "}"); + } + else if (statement->nodeType == HLSLNodeType_BlockStatement) { + HLSLBlockStatement* blockStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{"); + OutputStatements(indent + 1, blockStatement->statement); + m_writer.WriteLine(indent, "}"); + } + // FX file constructs + // else if (statement->nodeType == HLSLNodeType_Technique) + // { + // // Techniques are ignored. + // } + // else if (statement->nodeType == HLSLNodeType_Pipeline) + // { + // // Pipelines are ignored. + // } + else { + // Unhanded statement type. + Error("Unhandled statement"); + //ASSERT(false); + } + + statement = statement->nextStatement; + } +} + +// Use for templated buffers/textures +const char* HLSLGenerator::GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType) +{ + // TODO: have a way to disable use of half (like on MSLGenerator) + bool isHalf = IsHalf(formatType); + + // Can't use half4 textures with spirv. Can only cast from full float sampler. + // Can tell Vulkan was written by/for desktop IHVs. + // https://github.com/microsoft/DirectXShaderCompiler/issues/2711 + bool isSpirvTarget = m_options.writeVulkan; + if (isSpirvTarget) + isHalf = false; + + const char* formatName = isHalf ? "half4" : "float4"; + + // MSL only uses half/float mostly. With HLSL, this is a full + // template format of float/2/3/4. + + return formatName; +} + +void HLSLGenerator::OutputDeclaration(HLSLDeclaration* declaration) +{ + if (IsSamplerType(declaration->type)) { + int reg = -1; + if (declaration->registerName != NULL) { + sscanf(declaration->registerName, "s%d", ®); + } + + // sampler + const char* samplerTypeName = GetTypeName(declaration->type); + if (samplerTypeName) { + if (reg != -1) { + m_writer.Write("%s %s : register(s%d)", samplerTypeName, declaration->name, reg); + } + else { + m_writer.Write("%s %s", samplerTypeName, declaration->name); + } + } + return; + } + if (IsTextureType(declaration->type)) { + int reg = -1; + if (declaration->registerName != NULL) { + sscanf(declaration->registerName, "t%d", ®); + } + + HLSLBaseType formatType = declaration->type.formatType; + if (m_options.treatHalfAsFloat && IsHalf(formatType)) + formatType = HalfToFloatBaseType(formatType); + + const char* formatTypeName = GetFormatName(declaration->type.baseType, formatType); + + // texture carts the dimension and format + const char* textureTypeName = GetTypeName(declaration->type); + + if (textureTypeName != NULL) { + if (reg != -1) { + m_writer.Write("%s<%s> %s : register(t%d)", textureTypeName, formatTypeName, declaration->name, reg); + } + else { + m_writer.Write("%s<%s> %s", textureTypeName, formatTypeName, declaration->name); + } + } + return; + } + + OutputDeclarationType(declaration->type); + OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment); + declaration = declaration->nextDeclaration; + + while (declaration != NULL) { + m_writer.Write(", "); + OutputDeclarationBody(declaration->type, declaration->name, declaration->semantic, declaration->registerName, declaration->assignment); + declaration = declaration->nextDeclaration; + }; +} + +void HLSLGenerator::OutputDeclarationType(const HLSLType& type, bool isTypeCast) +{ + const char* typeName = GetTypeName(type); + + if (isTypeCast) { + m_writer.Write("%s", typeName); + return; + } + + if (type.flags & HLSLTypeFlag_Static) { + m_writer.Write("static "); + } + if (type.flags & HLSLTypeFlag_Const) { + m_writer.Write("const "); + } + + // Interpolation modifiers. + if (type.flags & HLSLTypeFlag_Centroid) { + m_writer.Write("centroid "); + } + if (type.flags & HLSLTypeFlag_Linear) { + m_writer.Write("linear "); + } + if (type.flags & HLSLTypeFlag_NoInterpolation) { + m_writer.Write("nointerpolation "); + } + if (type.flags & HLSLTypeFlag_NoPerspective) { + m_writer.Write("noperspective "); + } + if (type.flags & HLSLTypeFlag_Sample) // @@ Only in shader model >= 4.1 + { + m_writer.Write("sample "); + } + + m_writer.Write("%s ", typeName); +} + +void HLSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic /*=NULL*/, const char* registerName /*=NULL*/, HLSLExpression* assignment /*=NULL*/) +{ + m_writer.Write("%s", name); + + if (type.array) { + ASSERT(semantic == NULL); + m_writer.Write("["); + if (type.arraySize != NULL) { + OutputExpression(type.arraySize); + } + m_writer.Write("]"); + } + + if (semantic != NULL) { + m_writer.Write(" : %s", semantic); + } + + if (registerName != NULL) { + if (m_isInsideBuffer) { + m_writer.Write(" : packoffset(%s)", registerName); + } + else { + m_writer.Write(" : register(%s)", registerName); + } + } + + if (assignment != NULL && !IsSamplerType(type)) { + m_writer.Write(" = "); + if (type.array) { + m_writer.Write("{ "); + OutputExpressionList(assignment); + m_writer.Write(" }"); + } + else { + OutputExpression(assignment); + } + } +} + +void HLSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, const char* semantic /*=NULL*/, const char* registerName /*=NULL*/, HLSLExpression* assignment /*=NULL*/) +{ + OutputDeclarationType(type); + OutputDeclarationBody(type, name, semantic, registerName, assignment); +} + +bool HLSLGenerator::ChooseUniqueName(const char* base, char* dst, int dstLength) const +{ + // IC: Try without suffix first. + String_Printf(dst, dstLength, "%s", base); + if (!m_tree->GetContainsString(base)) { + return true; + } + + for (int i = 1; i < 1024; ++i) { + String_Printf(dst, dstLength, "%s%d", base, i); + if (!m_tree->GetContainsString(dst)) { + return true; + } + } + return false; +} + +} //namespace M4 diff --git a/hlslparser/src/HLSLGenerator.h b/hlslparser/src/HLSLGenerator.h new file mode 100644 index 00000000..5ad6b711 --- /dev/null +++ b/hlslparser/src/HLSLGenerator.h @@ -0,0 +1,80 @@ +//============================================================================= +// +// HLSLGenerator.h +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#pragma once + +#include "CodeWriter.h" +#include "HLSLTree.h" + +namespace M4 { + +class HLSLTree; +struct HLSLFunction; +struct HLSLStruct; + +// TODO: try to unify some options with MSLGenerator +struct HLSLOptions { + // int (*attributeCallback)(const char* name, uint32_t index) = NULL; + // uint32_t bufferRegisterOffset = 0; + + bool writeFileLine = false; + + bool treatHalfAsFloat = false; + // TODO: hook this up + // bool treatDoubleAsFloat = true; + + // add vk constructions to HLSL source to convert to Spriv + bool writeVulkan = false; +}; + +/** + * This class is used to generate HLSL which is compatible with the D3D9 + * compiler (i.e. no cbuffers). + */ +class HLSLGenerator { +public: + HLSLGenerator(); + + bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const HLSLOptions& options = HLSLOptions()); + const char* GetResult() const; + +private: + void OutputExpressionList(HLSLExpression* expression); + void OutputExpression(HLSLExpression* expression); + void OutputArguments(HLSLArgument* argument); + void OutputAttributes(int indent, HLSLAttribute* attribute); + void OutputStatements(int indent, HLSLStatement* statement); + void OutputDeclaration(HLSLDeclaration* declaration); + void OutputDeclaration(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* defaultValue = NULL); + void OutputDeclarationType(const HLSLType& type, bool isTypeCast = false); + void OutputDeclarationBody(const HLSLType& type, const char* name, const char* semantic = NULL, const char* registerName = NULL, HLSLExpression* assignment = NULL); + + /** Generates a name of the format "base+n" where n is an integer such that the name + * isn't used in the syntax tree. */ + bool ChooseUniqueName(const char* base, char* dst, int dstLength) const; + + const char* GetTypeName(const HLSLType& type); + + void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3); + + const char* GetFormatName(HLSLBaseType bufferOrTextureType, HLSLBaseType formatType); + bool CanSkipWrittenStatement(const HLSLStatement* statement) const; + +private: + CodeWriter m_writer; + + const HLSLTree* m_tree; + const char* m_entryName; + HLSLTarget m_target; + bool m_isInsideBuffer; + bool m_error; + HLSLOptions m_options; +}; + +} //namespace M4 diff --git a/hlslparser/src/HLSLParser.cpp b/hlslparser/src/HLSLParser.cpp new file mode 100644 index 00000000..46580c57 --- /dev/null +++ b/hlslparser/src/HLSLParser.cpp @@ -0,0 +1,4705 @@ +//============================================================================= +// +// Render/HLSLParser.cpp +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#include "HLSLParser.h" + +#include "Engine.h" +#include "HLSLTree.h" + +#ifdef _WIN32 +#include // for alloca +#endif +#include +#include + +// stl +#include +#include +#include + +namespace M4 { + +enum CompareFunctionsResult { + FunctionsEqual, + Function1Better, + Function2Better +}; + +enum CoreType { + CoreType_None, + + CoreType_Scalar, + CoreType_Vector, + CoreType_Matrix, + + CoreType_Sampler, + CoreType_Texture, + CoreType_Struct, + CoreType_Void, + CoreType_Expression, + CoreType_Comment, + CoreType_Buffer, + + CoreType_Count // must be last +}; + +enum DimensionType { + DimensionType_None, + + DimensionType_Scalar, + + DimensionType_Vector2, + DimensionType_Vector3, + DimensionType_Vector4, + + DimensionType_Matrix2x2, + DimensionType_Matrix3x3, + DimensionType_Matrix4x4, + + //DimensionType_Matrix4x3, // TODO: no 3x4 + //DimensionType_Matrix4x2 +}; + +// Can use this to break apart type to useful constructs +struct BaseTypeDescription { + const char* typeName = ""; + const char* typeNameMetal = ""; + + HLSLBaseType baseType = HLSLBaseType_Unknown; + CoreType coreType = CoreType_None; + DimensionType dimensionType = DimensionType_None; + NumericType numericType = NumericType_NaN; + + // TODO: is this useful ? + // int numDimensions; // scalar = 0, vector = 1, matrix = 2 + uint8_t numDimensions = 0; + uint8_t numComponents = 0; + uint8_t height = 0; + + int8_t binaryOpRank = -1; // or was this supposed to be max (-1 in uint8_t) +}; + +// really const +extern BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count]; + +bool IsSamplerType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Sampler; +} + +bool IsMatrixType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Matrix; +} + +bool IsVectorType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Vector; +} + +bool IsScalarType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Scalar; +} + +bool IsTextureType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Texture; +} + +bool IsDepthTextureType(HLSLBaseType baseType) +{ + // return baseTypeDescriptions[baseType].coreType == CoreType_DepthTexture; + return baseType == HLSLBaseType_Depth2D || + baseType == HLSLBaseType_Depth2DArray || + baseType == HLSLBaseType_DepthCube; +} + +bool IsBufferType(HLSLBaseType baseType) +{ + return baseTypeDescriptions[baseType].coreType == CoreType_Buffer; +} + +bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType) +{ + return baseTypeDescriptions[lhsType].coreType == + baseTypeDescriptions[rhsType].coreType; +} + +bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType) +{ + return baseTypeDescriptions[lhsType].numComponents == + baseTypeDescriptions[rhsType].numComponents && + baseTypeDescriptions[lhsType].height == + baseTypeDescriptions[rhsType].height; +} + +bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType) +{ + return baseTypeDescriptions[lhsType].height == + baseTypeDescriptions[rhsType].numComponents; +} + +bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType) +{ + return baseTypeDescriptions[lhsType].numericType == + baseTypeDescriptions[rhsType].numericType; +} + +// TODO: with so many types, should just request the numeric type +bool IsHalf(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Half; +} + +bool IsFloat(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Float; +} + +bool IsDouble(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Double; +} + +bool IsFloatingType(HLSLBaseType type) +{ + NumericType n = baseTypeDescriptions[type].numericType; + return n == NumericType_Half || n == NumericType_Float || n == NumericType_Double; +} + +bool IsIntegerType(HLSLBaseType type) +{ + NumericType n = baseTypeDescriptions[type].numericType; + return n == NumericType_Int || n == NumericType_Uint || + n == NumericType_Short || n == NumericType_Ushort || + n == NumericType_Long || n == NumericType_Ulong; +} + +bool IsInt(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Int; +} + +bool IsUint(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Uint; +} + +bool IsShort(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Short; +} +bool IsUshort(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Ushort; +} + +bool IsLong(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Long; +} +bool IsUlong(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Ulong; +} + +bool IsBool(HLSLBaseType type) +{ + return baseTypeDescriptions[type].numericType == NumericType_Bool; +} + +bool IsSamplerType(const HLSLType& type) +{ + return IsSamplerType(type.baseType); +} + +bool IsScalarType(const HLSLType& type) +{ + return IsScalarType(type.baseType); +} + +bool IsVectorType(const HLSLType& type) +{ + return IsVectorType(type.baseType); +} + +bool IsMatrixType(const HLSLType& type) +{ + return IsMatrixType(type.baseType); +} + +bool IsTextureType(const HLSLType& type) +{ + return IsTextureType(type.baseType); +} + +bool IsNumericType(HLSLBaseType baseType) +{ + return IsVectorType(baseType) || IsScalarType(baseType) || IsMatrixType(baseType); +} + +HLSLBufferType ConvertTokenToBufferType(HLSLToken token) +{ + HLSLBufferType type = HLSLBufferType_CBuffer; + + switch (token) { + // DX9 + case HLSLToken_CBuffer: + type = HLSLBufferType_CBuffer; + break; + case HLSLToken_TBuffer: + type = HLSLBufferType_TBuffer; + break; + + // DX10 + case HLSLToken_ConstantBuffer: + type = HLSLBufferType_ConstantBuffer; + break; + case HLSLToken_StructuredBuffer: + type = HLSLBufferType_StructuredBuffer; + break; + case HLSLToken_RWStructuredBuffer: + type = HLSLBufferType_RWStructuredBuffer; + break; + case HLSLToken_ByteAddressBuffer: + type = HLSLBufferType_ByteAddressBuffer; + break; + case HLSLToken_RWByteAddressBuffer: + type = HLSLBufferType_RWByteAddressBuffer; + break; + + default: + break; + } + + return type; +} + +HLSLBaseType NumericToBaseType(NumericType numericType) +{ + HLSLBaseType baseType = HLSLBaseType_Unknown; + switch (numericType) { + case NumericType_Float: + baseType = HLSLBaseType_Float; + break; + case NumericType_Half: + baseType = HLSLBaseType_Half; + break; + case NumericType_Double: + baseType = HLSLBaseType_Bool; + break; + + case NumericType_Int: + baseType = HLSLBaseType_Int; + break; + case NumericType_Uint: + baseType = HLSLBaseType_Uint; + break; + case NumericType_Ushort: + baseType = HLSLBaseType_Ushort; + break; + case NumericType_Short: + baseType = HLSLBaseType_Short; + break; + case NumericType_Ulong: + baseType = HLSLBaseType_Ulong; + break; + case NumericType_Long: + baseType = HLSLBaseType_Long; + break; + case NumericType_Bool: + baseType = HLSLBaseType_Bool; + break; + + // MSL has 8-bit, but HLSL/Vulkan don't + //case NumericType_Uint8: baseType = HLSLBaseType_Uint8; break; + //case NumericType_Int8: baseType = HLSLBaseType_Int8; break; + + default: + break; + } + return baseType; +} + +HLSLBaseType GetScalarType(HLSLBaseType type) +{ + ASSERT(IsNumericType(type)); + return NumericToBaseType(baseTypeDescriptions[type].numericType); +} + +int32_t GetVectorDimension(HLSLBaseType type) +{ + if (IsScalarType(type)) return 1; + if (!IsVectorType(type)) return 0; + + return baseTypeDescriptions[type].numComponents; +} + +HLSLBaseType HalfToFloatBaseType(HLSLBaseType type) +{ + if (IsHalf(type)) + type = (HLSLBaseType)(HLSLBaseType_Float + (type - HLSLBaseType_Half)); + return type; +} + +HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type) +{ + if (IsDouble(type)) + type = (HLSLBaseType)(HLSLBaseType_Float + (type - HLSLBaseType_Double)); + return type; +} + +static HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2); + +const char* GetNumericTypeName(HLSLBaseType type) +{ + if (!IsNumericType(type)) + return nullptr; + + // MSL/HLSL share the same type names + const auto& b = baseTypeDescriptions[type]; + return b.typeName; +} + +HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type) +{ + return HLSLBaseType(NumericToBaseType(baseTypeDescriptions[type].numericType) + + baseTypeDescriptions[type].dimensionType - DimensionType_Scalar); +} + +/** This structure stores a HLSLFunction-like declaration for an intrinsic function */ +struct Intrinsic { + explicit Intrinsic(const char* name, uint32_t numArgs) + { + function.name = name; + function.numArguments = numArgs; + + if (numArgs == 0) return; + + for (uint32_t i = 0; i < numArgs; ++i) { + argument[i].type.flags = HLSLTypeFlag_Const; + } + } + + void ChainArgumentPointers() + { + function.argument = argument + 0; + + uint32_t numArgs = function.numArguments; + // This chain pf pointers won't surive copy + for (uint32_t i = 0; i < numArgs; ++i) { + if (i < numArgs - 1) + argument[i].nextArgument = argument + i + 1; + } + } + + void SetArgumentTypes(HLSLBaseType returnType, HLSLBaseType args[4]) + { + function.returnType.baseType = returnType; + for (uint32_t i = 0; i < function.numArguments; ++i) { + ASSERT(args[i] != HLSLBaseType_Unknown); + argument[i].type.baseType = args[i]; + } + } + + void ArgsToArray(HLSLBaseType args[4], uint32_t& numArgs, HLSLBaseType arg1, HLSLBaseType arg2, HLSLBaseType arg3, HLSLBaseType arg4) + { + numArgs = 0; + if (arg1 == HLSLBaseType_Unknown) return; + args[numArgs++] = arg1; + if (arg2 == HLSLBaseType_Unknown) return; + args[numArgs++] = arg2; + if (arg3 == HLSLBaseType_Unknown) return; + args[numArgs++] = arg3; + if (arg4 == HLSLBaseType_Unknown) return; + args[numArgs++] = arg4; + } + + explicit Intrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 = HLSLBaseType_Unknown, HLSLBaseType arg2 = HLSLBaseType_Unknown, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) + { + function.name = name; + + HLSLBaseType argumentTypes[4]; + uint32_t numArgs = 0; + ArgsToArray(argumentTypes, numArgs, arg1, arg2, arg3, arg4); + + *this = Intrinsic(name, numArgs); + SetArgumentTypes(returnType, argumentTypes); + } + + // TODO: allow member function intrinsices on buffers/textures + HLSLFunction function; + HLSLArgument argument[4]; +}; + +// So many calls are member functions in modern HLSL/MSL. +// This means the parser has to work harder to write out these intrinsics +// since some have default args, and some need level(), bias() wrappers in MSL. +// That complexity is currently hidden away in wrapper C-style calls in ShaderMSL.h. +#define USE_MEMBER_FUNCTIONS 1 + +static void AddIntrinsic(const Intrinsic& intrinsic); + +void AddTextureLoadIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) +{ +#if USE_MEMBER_FUNCTIONS + Intrinsic i(name, returnType, uvType, arg3, arg4); + i.function.memberType = textureType; // extract formatType from return type +#else +// Intrinsic i(name, returnType, textureType, uvType); +// +// // classify textureType subtype off scalar +// i.argument[0].type.formatType = GetScalarType(returnType); +#endif + + AddIntrinsic(i); +} + +void AddTextureIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) +{ +#if USE_MEMBER_FUNCTIONS + Intrinsic i(name, returnType, HLSLBaseType_SamplerState, uvType, arg3, arg4); + i.function.memberType = textureType; +#else +// Intrinsic i(name, returnType, textureType, HLSLBaseType_SamplerState, uvType); +// +// // classify textureType subtype off scalar +// i.argument[0].type.formatType = GetScalarType(returnType); +#endif + + AddIntrinsic(i); +} + +void AddTextureIntrinsics(const char* name, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) +{ + AddTextureIntrinsic(name, HLSLBaseType_Float4, textureType, uvType, arg3, arg4); + AddTextureIntrinsic(name, HLSLBaseType_Half4, textureType, uvType, arg3, arg4); +} + +// DepthCmp takes additional arg for comparison value, but this rolls it into uv +void AddDepthIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType textureType, HLSLBaseType uvType, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) +{ + // ComparisonState is only for SampleCmp/GatherCmp + bool isCompare = String_Equal(name, "GatherCmp") || String_Equal(name, "SampleCmp"); + HLSLBaseType samplerType = isCompare ? HLSLBaseType_SamplerComparisonState : HLSLBaseType_SamplerState; + +#if USE_MEMBER_FUNCTIONS + Intrinsic i(name, returnType, samplerType, uvType, arg3, arg4); + i.function.memberType = textureType; +#else +// Intrinsic i(name, returnType, textureType, samplerType, uvType); +// i.argument[0].type.formatType = GetScalarType(returnType); +#endif + + AddIntrinsic(i); +} + +// TODO: elim the H version once have member functions, can check the member textuer format. +//#define TEXTURE_INTRINSIC_FUNCTION(name, textureType, uvType) \ +// AddTextureIntrinsic( name, HLSLBaseType_Float4, textureType, uvType) \ +// AddTextureIntrinsic( name, HLSLBaseType_Half4, textureType, uvType ) + +static const int _numberTypeRank[NumericType_Count][NumericType_Count] = + { + // across is what type list on right is converted into (5 means don't, 0 means best) + //F H D B I UI S US L UL + {0, 3, 3, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Float + {2, 0, 4, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Half + {1, 4, 0, 4, 4, 4, 4, 4, 4, 4}, // NumericType_Double + + {5, 5, 5, 0, 5, 5, 5, 5, 5, 5}, // NumericType_Bool + {5, 5, 5, 4, 0, 3, 4, 3, 5, 5}, // NumericType_Int + {5, 5, 5, 4, 2, 0, 3, 4, 5, 5}, // NumericType_Uint + {5, 5, 5, 4, 0, 3, 0, 5, 5, 5}, // NumericType_Short + {5, 5, 5, 4, 2, 0, 5, 0, 5, 5}, // NumericType_Ushort + + {5, 5, 5, 4, 0, 3, 5, 5, 0, 5}, // NumericType_Long + {5, 5, 5, 4, 2, 0, 5, 5, 5, 0}, // NumericType_Ulong +}; + +/* All FX state +struct EffectStateValue +{ + const char * name; + int value; +}; + +static const EffectStateValue textureFilteringValues[] = { + {"None", 0}, + {"Point", 1}, + {"Linear", 2}, + {"Anisotropic", 3}, + {NULL, 0} +}; + +static const EffectStateValue textureAddressingValues[] = { + {"Wrap", 1}, + {"Mirror", 2}, + {"Clamp", 3}, + {"Border", 4}, + {"MirrorOnce", 5}, + {NULL, 0} +}; + +static const EffectStateValue booleanValues[] = { + {"False", 0}, + {"True", 1}, + {NULL, 0} +}; + +static const EffectStateValue cullValues[] = { + {"None", 1}, + {"CW", 2}, + {"CCW", 3}, + {NULL, 0} +}; + +static const EffectStateValue cmpValues[] = { + {"Never", 1}, + {"Less", 2}, + {"Equal", 3}, + {"LessEqual", 4}, + {"Greater", 5}, + {"NotEqual", 6}, + {"GreaterEqual", 7}, + {"Always", 8}, + {NULL, 0} +}; + +static const EffectStateValue blendValues[] = { + {"Zero", 1}, + {"One", 2}, + {"SrcColor", 3}, + {"InvSrcColor", 4}, + {"SrcAlpha", 5}, + {"InvSrcAlpha", 6}, + {"DestAlpha", 7}, + {"InvDestAlpha", 8}, + {"DestColor", 9}, + {"InvDestColor", 10}, + {"SrcAlphaSat", 11}, + {"BothSrcAlpha", 12}, + {"BothInvSrcAlpha", 13}, + {"BlendFactor", 14}, + {"InvBlendFactor", 15}, + {"SrcColor2", 16}, // Dual source blending. D3D9Ex only. + {"InvSrcColor2", 17}, + {NULL, 0} +}; + +static const EffectStateValue blendOpValues[] = { + {"Add", 1}, + {"Subtract", 2}, + {"RevSubtract", 3}, + {"Min", 4}, + {"Max", 5}, + {NULL, 0} +}; + +static const EffectStateValue fillModeValues[] = { + {"Point", 1}, + {"Wireframe", 2}, + {"Solid", 3}, + {NULL, 0} +}; + +static const EffectStateValue stencilOpValues[] = { + {"Keep", 1}, + {"Zero", 2}, + {"Replace", 3}, + {"IncrSat", 4}, + {"DecrSat", 5}, + {"Invert", 6}, + {"Incr", 7}, + {"Decr", 8}, + {NULL, 0} +}; + +// These are flags. +static const EffectStateValue colorMaskValues[] = { + {"False", 0}, + {"Red", 1<<0}, + {"Green", 1<<1}, + {"Blue", 1<<2}, + {"Alpha", 1<<3}, + {"X", 1<<0}, + {"Y", 1<<1}, + {"Z", 1<<2}, + {"W", 1<<3}, + {NULL, 0} +}; + +static const EffectStateValue integerValues[] = { + {NULL, 0} +}; + +static const EffectStateValue floatValues[] = { + {NULL, 0} +}; + + +struct EffectState +{ + const char * name; + int d3drs; + const EffectStateValue * values; +}; + +static const EffectState samplerStates[] = { + {"AddressU", 1, textureAddressingValues}, + {"AddressV", 2, textureAddressingValues}, + {"AddressW", 3, textureAddressingValues}, + // limited choices for bordercolor on mobile, so assume transparent + // "BorderColor", 4, D3DCOLOR + {"MagFilter", 5, textureFilteringValues}, + {"MinFilter", 6, textureFilteringValues}, + {"MipFilter", 7, textureFilteringValues}, + {"MipMapLodBias", 8, floatValues}, + // TODO: also MinMipLevel + {"MaxMipLevel", 9, integerValues}, + {"MaxAnisotropy", 10, integerValues}, + // Format conveys this now {"sRGBTexture", 11, booleanValues}, +}; + +// can set these states in an Effect block from FX files +static const EffectState effectStates[] = { + {"VertexShader", 0, NULL}, + {"PixelShader", 0, NULL}, + {"AlphaBlendEnable", 27, booleanValues}, + {"SrcBlend", 19, blendValues}, + {"DestBlend", 20, blendValues}, + {"BlendOp", 171, blendOpValues}, + {"SeparateAlphaBlendEanble", 206, booleanValues}, + {"SrcBlendAlpha", 207, blendValues}, + {"DestBlendAlpha", 208, blendValues}, + {"BlendOpAlpha", 209, blendOpValues}, + {"AlphaTestEnable", 15, booleanValues}, + {"AlphaRef", 24, integerValues}, + {"AlphaFunc", 25, cmpValues}, + {"CullMode", 22, cullValues}, + {"ZEnable", 7, booleanValues}, + {"ZWriteEnable", 14, booleanValues}, + {"ZFunc", 23, cmpValues}, + {"StencilEnable", 52, booleanValues}, + {"StencilFail", 53, stencilOpValues}, + {"StencilZFail", 54, stencilOpValues}, + {"StencilPass", 55, stencilOpValues}, + {"StencilFunc", 56, cmpValues}, + {"StencilRef", 57, integerValues}, + {"StencilMask", 58, integerValues}, + {"StencilWriteMask", 59, integerValues}, + {"TwoSidedStencilMode", 185, booleanValues}, + {"CCW_StencilFail", 186, stencilOpValues}, + {"CCW_StencilZFail", 187, stencilOpValues}, + {"CCW_StencilPass", 188, stencilOpValues}, + {"CCW_StencilFunc", 189, cmpValues}, + {"ColorWriteEnable", 168, colorMaskValues}, + {"FillMode", 8, fillModeValues}, + {"MultisampleAlias", 161, booleanValues}, + {"MultisampleMask", 162, integerValues}, + {"ScissorTestEnable", 174, booleanValues}, + {"SlopeScaleDepthBias", 175, floatValues}, + {"DepthBias", 195, floatValues} +}; + + +static const EffectStateValue witnessCullModeValues[] = { + {"None", 0}, + {"Back", 1}, + {"Front", 2}, + {NULL, 0} +}; + +static const EffectStateValue witnessFillModeValues[] = { + {"Solid", 0}, + {"Wireframe", 1}, + {NULL, 0} +}; + +static const EffectStateValue witnessBlendModeValues[] = { + {"Disabled", 0}, + {"AlphaBlend", 1}, // src * a + dst * (1-a) + {"Add", 2}, // src + dst + {"Mixed", 3}, // src + dst * (1-a) + {"Multiply", 4}, // src * dst + {"Multiply2", 5}, // 2 * src * dst + {NULL, 0} +}; + +static const EffectStateValue witnessDepthFuncValues[] = { + {"LessEqual", 0}, + {"Less", 1}, + {"Equal", 2}, + {"Greater", 3}, + {"Always", 4}, + {NULL, 0} +}; + +static const EffectStateValue witnessStencilModeValues[] = { + {"Disabled", 0}, + {"Set", 1}, + {"Test", 2}, + {NULL, 0} +}; + +static const EffectState pipelineStates[] = { + {"VertexShader", 0, NULL}, + {"PixelShader", 0, NULL}, + + // Depth_Stencil_State + {"DepthWrite", 0, booleanValues}, + {"DepthEnable", 0, booleanValues}, + {"DepthFunc", 0, witnessDepthFuncValues}, + {"StencilMode", 0, witnessStencilModeValues}, + + // Raster_State + {"CullMode", 0, witnessCullModeValues}, + {"FillMode", 0, witnessFillModeValues}, + {"MultisampleEnable", 0, booleanValues}, + {"PolygonOffset", 0, booleanValues}, + + // Blend_State + {"BlendMode", 0, witnessBlendModeValues}, + {"ColorWrite", 0, booleanValues}, + {"AlphaWrite", 0, booleanValues}, + {"AlphaTest", 0, booleanValues}, // This is really alpha to coverage. +}; +*/ + +// Note: these strings need to live until end of the app +StringPool gStringPool(NULL); + +enum All { + AllHalf = (1 << 0), + AllFloat = (1 << 1), + AllDouble = (1 << 2), + + AllFloats = AllHalf | AllFloat | AllDouble, + + AllUint = (1 << 3), + AllInt = (1 << 4), + AllShort = (1 << 5), + AllUshort = (1 << 6), + AllLong = (1 << 7), + AllUlong = (1 << 8), + AllBool = (1 << 9), + + AllInts = AllUint | AllInt | AllShort | AllUshort | AllLong | AllUlong | AllBool, + + //AllScalar = (1<<15), + AllVecs = (1 << 16), + AllMats = (1 << 17), + AllDims = AllVecs | AllMats, +}; +using AllMask = uint32_t; + +// TODO: want to use Array, but it needs Allocator passed +struct Range { + uint32_t start; + uint32_t count; +}; +using IntrinsicRangeMap = std::unordered_map; + +static std::vector _intrinsics; + +// This will help with comparison to avoid O(n) search of all 5000 intrinsics +static IntrinsicRangeMap _intrinsicRangeMap; + +static void AddIntrinsic(const Intrinsic& intrinsic) +{ + const char* name = intrinsic.function.name; + + // Put in string pool since using this as a key. Also means equals just ptr compar. + name = gStringPool.AddString(name); + + // track intrinsic range in a map, also the name lookup helps speed the parser up + auto it = _intrinsicRangeMap.find(name); + if (it != _intrinsicRangeMap.end()) { + it->second.count++; + } + else { + _intrinsicRangeMap[name] = {(uint32_t)_intrinsics.size(), 1}; + } + + // To avoid having growth destroy the argument chains + const uint32_t kMaxIntrinsics = 10000; // TODO: reduce once count is known + if (_intrinsics.empty()) + _intrinsics.reserve(kMaxIntrinsics); + ASSERT(_intrinsics.size() < kMaxIntrinsics); + + _intrinsics.push_back(intrinsic); + _intrinsics.back().function.name = name; + + // These pointers change when copied or when vector grows, so do a reserve + _intrinsics.back().ChainArgumentPointers(); +} + +void AddIntrinsic(const char* name, HLSLBaseType returnType, HLSLBaseType arg1 = HLSLBaseType_Unknown, HLSLBaseType arg2 = HLSLBaseType_Unknown, HLSLBaseType arg3 = HLSLBaseType_Unknown, HLSLBaseType arg4 = HLSLBaseType_Unknown) +{ + Intrinsic intrinsic(name, returnType, arg1, arg2, arg3, arg4); + AddIntrinsic(intrinsic); +} + +void RegisterBaseTypeIntrinsic(Intrinsic& intrinsic, uint32_t numArgs, HLSLBaseType returnType, HLSLBaseType baseType, uint32_t start, uint32_t end) +{ + HLSLBaseType args[4] = {}; + + for (uint32_t i = start; i < end; ++i) { + HLSLBaseType baseTypeIter = (HLSLBaseType)(baseType + i); + + HLSLBaseType newReturnType = (returnType == HLSLBaseType_Unknown) ? baseTypeIter : returnType; + + for (uint32_t a = 0; a < numArgs; ++a) + args[a] = baseTypeIter; + + intrinsic.SetArgumentTypes(newReturnType, args); + AddIntrinsic(intrinsic); + } +} + +inline bool TestBits(AllMask mask, AllMask maskTest) +{ + return (mask & maskTest) == maskTest; +} + +void RegisterIntrinsics(const char* name, uint32_t numArgs, AllMask mask, HLSLBaseType returnType = HLSLBaseType_Unknown) +{ + Intrinsic intrinsic(name, numArgs); + + { + const uint32_t kNumTypes = 3; + HLSLBaseType baseTypes[kNumTypes] = {HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double}; + + bool skip[kNumTypes] = {}; + if (!TestBits(mask, AllFloat)) + skip[0] = true; + if (!TestBits(mask, AllHalf)) + skip[1] = true; + if (!TestBits(mask, AllDouble)) + skip[2] = true; + + for (uint32_t i = 0; i < kNumTypes; ++i) { + if (skip[i]) continue; + HLSLBaseType baseType = baseTypes[i]; + + if (mask & AllVecs) + RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4); + if (mask & AllMats) + RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 4, 7); + } + } + + if ((mask & AllInts) == AllInts) { + const uint32_t kNumTypes = 7; + HLSLBaseType baseTypes[kNumTypes] = { + HLSLBaseType_Long, HLSLBaseType_Ulong, + HLSLBaseType_Int, HLSLBaseType_Uint, + HLSLBaseType_Short, HLSLBaseType_Ushort, + HLSLBaseType_Bool}; + + bool skip[kNumTypes] = {}; + if (!TestBits(mask, AllLong)) + skip[0] = true; + if (!TestBits(mask, AllUlong)) + skip[1] = true; + if (!TestBits(mask, AllInt)) + skip[2] = true; + if (!TestBits(mask, AllUint)) + skip[3] = true; + if (!TestBits(mask, AllShort)) + skip[4] = true; + if (!TestBits(mask, AllUshort)) + skip[5] = true; + if (!TestBits(mask, AllBool)) + skip[6] = true; + + for (uint32_t i = 0; i < kNumTypes; ++i) { + if (skip[i]) continue; + HLSLBaseType baseType = baseTypes[i]; + + if (mask & AllVecs) + RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, baseType, 0, 4); + + // TODO: No int matrices yet, but could add them + //if (mask & AllMats) + // RegisterBaseTypeIntrinsic(intrinsic, numArgs, returnType, 4, 7); + } + } +} + +#define ArrayCount(array) (sizeof(array) / sizeof(array[0])) + +bool InitIntrinsics() +{ + // Note that none of these need to be in alphabetical order + // since an unordered map is used for lookup. But do need + // all intrinsics of the same name to be defined together in + // a single range. + + const char* kVecOps1[] = { + "acos", "asin", "atan", + "cos", "sin", "tan", + "cosh", "sinh", "tanh", + "floor", "ceil", "frac", "fmod", "round", "trunc", + "normalize", "sqrt", "rsqrt", "rcp", "saturate", "sign", + "log", "log2", "log10", + "exp", "exp2", + "ddx", "ddy", // ps only + "isnan", "isinf", "isfinite", + "degrees", "radians" // emulated in MSL + }; + + // apply to float/int + const char* kVecOps1All[] = { + "abs", + }; + + const char* kVecOps2[] = { + "atan2", + "pow", // can't pow take scalar? + "step", + "frexp", + }; + + // apply to float/int + const char* kVecOps2All[] = { + "min", + "max", + }; + + const char* kVecOps3[] = { + "lerp", // can clamp and lerp take a scalar for last args/arg? + "smoothstep", + "fma", + }; + + // apply to float/int + const char* kVecOps3All[] = { + "clamp", + "min3", + "max3", + }; + + // HLSL intrinsics + // + // not going to support due to swizzle, just have similar routine for half + // D3DCOLORtoUBYTE4(x) // does nasty bgra swizzle, so have to convert back + // r0.xyzw = float4(255.001953,255.001953,255.001953,255.001953) * r0.zyxw; + // ro0.xyzw = (int4)r0.xyzw; + // + // there's already toint/tuint, but those don't normalize + // + // ddx_coarse/fine, ddy_coarse/fine + // msad4 + // printf, errorf + // + // faceforward = -n * sign(dot(i, ng)) + // no "exp10" in HLSL, but is in MSL + //--------------------- + // MSL intrinsics + // median3(x,y,z) + // select(x,y,z) + // addsat, subsat, rotate, + // absdiff, hadd(x,y), + // is_null_texture(tex) + // tex.fence() + + AllMask mask = AllFloats | AllVecs; + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps1[i], 1, mask); + } + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps2[i], 2, mask); + } + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps3[i], 3, mask); + } + + mask = AllFloats | AllInts | AllVecs; + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps1All); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps1All[i], 1, mask); + } + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps2All); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps2All[i], 2, mask); + } + for (uint32_t i = 0, iEnd = ArrayCount(kVecOps3All); i < iEnd; ++i) { + RegisterIntrinsics(kVecOps3All[i], 3, mask); + } + + // bit counting + RegisterIntrinsics("countbits", 1, AllInts | AllVecs); // popcount in MSL + RegisterIntrinsics("firstbithigh", 1, AllInts | AllVecs); // clz in MSL + RegisterIntrinsics("firstbitlow", 1, AllInts | AllVecs); // ctz in MSL + RegisterIntrinsics("reversebits", 1, AllInts | AllVecs); // ctz in MSL + + RegisterIntrinsics("sincos", 2, AllFloats | AllVecs, HLSLBaseType_Void); + + RegisterIntrinsics("mad", 3, AllFloats | AllVecs); + + RegisterIntrinsics("any", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool); + RegisterIntrinsics("all", 1, AllFloats | AllInts | AllVecs, HLSLBaseType_Bool); + + RegisterIntrinsics("clip", 1, AllFloats | AllVecs, HLSLBaseType_Void); + + RegisterIntrinsics("dot", 2, AllHalf | AllVecs, HLSLBaseType_Half); + RegisterIntrinsics("dot", 2, AllFloat | AllVecs, HLSLBaseType_Float); + RegisterIntrinsics("dot", 2, AllDouble | AllVecs, HLSLBaseType_Double); + + // 3d cross product only + AddIntrinsic("cross", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3); + AddIntrinsic("cross", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3); + AddIntrinsic("cross", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3); + + AddIntrinsic("reflect", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3); + AddIntrinsic("reflect", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3); + AddIntrinsic("reflect", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3); + + AddIntrinsic("refract", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float); + AddIntrinsic("refract", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half); + AddIntrinsic("refract", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double); + + RegisterIntrinsics("length", 1, AllHalf | AllVecs, HLSLBaseType_Half); + RegisterIntrinsics("length", 1, AllFloat | AllVecs, HLSLBaseType_Float); + RegisterIntrinsics("length", 1, AllDouble | AllVecs, HLSLBaseType_Double); + + // MSL construct + RegisterIntrinsics("length_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half); + RegisterIntrinsics("length_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float); + RegisterIntrinsics("length_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double); + + RegisterIntrinsics("distance", 1, AllHalf | AllVecs, HLSLBaseType_Half); + RegisterIntrinsics("distance", 1, AllFloat | AllVecs, HLSLBaseType_Float); + RegisterIntrinsics("distance", 1, AllDouble | AllVecs, HLSLBaseType_Double); + + RegisterIntrinsics("distance_squared", 1, AllHalf | AllVecs, HLSLBaseType_Half); + RegisterIntrinsics("distance_squared", 1, AllFloat | AllVecs, HLSLBaseType_Float); + RegisterIntrinsics("distance_squared", 1, AllDouble | AllVecs, HLSLBaseType_Double); + + // ps only + AddIntrinsic("fwidth", HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2); + + // scalar/vec ops + RegisterIntrinsics("mul", 2, AllFloat | AllVecs | AllMats); + + // scalar mul, since * isn't working on Metal properly + // m = s * m + AddIntrinsic("mul", HLSLBaseType_Float2x2, HLSLBaseType_Float, HLSLBaseType_Float2x2); + AddIntrinsic("mul", HLSLBaseType_Float3x3, HLSLBaseType_Float, HLSLBaseType_Float3x3); + AddIntrinsic("mul", HLSLBaseType_Float4x4, HLSLBaseType_Float, HLSLBaseType_Float4x4); + AddIntrinsic("mul", HLSLBaseType_Float2x2, HLSLBaseType_Float2x2, HLSLBaseType_Float); + AddIntrinsic("mul", HLSLBaseType_Float3x3, HLSLBaseType_Float3x3, HLSLBaseType_Float); + AddIntrinsic("mul", HLSLBaseType_Float4x4, HLSLBaseType_Float4x4, HLSLBaseType_Float); + + // v = v * m + AddIntrinsic("mul", HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2x2); + AddIntrinsic("mul", HLSLBaseType_Float3, HLSLBaseType_Float3, HLSLBaseType_Float3x3); + AddIntrinsic("mul", HLSLBaseType_Float4, HLSLBaseType_Float4, HLSLBaseType_Float4x4); + AddIntrinsic("mul", HLSLBaseType_Float2, HLSLBaseType_Float2x2, HLSLBaseType_Float2); + AddIntrinsic("mul", HLSLBaseType_Float3, HLSLBaseType_Float3x3, HLSLBaseType_Float3); + AddIntrinsic("mul", HLSLBaseType_Float4, HLSLBaseType_Float4x4, HLSLBaseType_Float4); + + // m = s * m + AddIntrinsic("mul", HLSLBaseType_Half2x2, HLSLBaseType_Half, HLSLBaseType_Half2x2); + AddIntrinsic("mul", HLSLBaseType_Half3x3, HLSLBaseType_Half, HLSLBaseType_Half3x3); + AddIntrinsic("mul", HLSLBaseType_Half4x4, HLSLBaseType_Half, HLSLBaseType_Half4x4); + AddIntrinsic("mul", HLSLBaseType_Half2x2, HLSLBaseType_Half2x2, HLSLBaseType_Half); + AddIntrinsic("mul", HLSLBaseType_Half3x3, HLSLBaseType_Half3x3, HLSLBaseType_Half); + AddIntrinsic("mul", HLSLBaseType_Half4x4, HLSLBaseType_Half4x4, HLSLBaseType_Half); + + // v = v * m + AddIntrinsic("mul", HLSLBaseType_Half2, HLSLBaseType_Half2, HLSLBaseType_Half2x2); + AddIntrinsic("mul", HLSLBaseType_Half3, HLSLBaseType_Half3, HLSLBaseType_Half3x3); + AddIntrinsic("mul", HLSLBaseType_Half4, HLSLBaseType_Half4, HLSLBaseType_Half4x4); + AddIntrinsic("mul", HLSLBaseType_Half2, HLSLBaseType_Half2x2, HLSLBaseType_Half2); + AddIntrinsic("mul", HLSLBaseType_Half3, HLSLBaseType_Half3x3, HLSLBaseType_Half3); + AddIntrinsic("mul", HLSLBaseType_Half4, HLSLBaseType_Half4x4, HLSLBaseType_Half4); + + // m = s * m + AddIntrinsic("mul", HLSLBaseType_Double2x2, HLSLBaseType_Double, HLSLBaseType_Double2x2); + AddIntrinsic("mul", HLSLBaseType_Double3x3, HLSLBaseType_Double, HLSLBaseType_Double3x3); + AddIntrinsic("mul", HLSLBaseType_Double4x4, HLSLBaseType_Double, HLSLBaseType_Double4x4); + AddIntrinsic("mul", HLSLBaseType_Double2x2, HLSLBaseType_Double2x2, HLSLBaseType_Double); + AddIntrinsic("mul", HLSLBaseType_Double3x3, HLSLBaseType_Double3x3, HLSLBaseType_Double); + AddIntrinsic("mul", HLSLBaseType_Double4x4, HLSLBaseType_Double4x4, HLSLBaseType_Double); + + // v = v * m + AddIntrinsic("mul", HLSLBaseType_Double2, HLSLBaseType_Double2, HLSLBaseType_Double2x2); + AddIntrinsic("mul", HLSLBaseType_Double3, HLSLBaseType_Double3, HLSLBaseType_Double3x3); + AddIntrinsic("mul", HLSLBaseType_Double4, HLSLBaseType_Double4, HLSLBaseType_Double4x4); + AddIntrinsic("mul", HLSLBaseType_Double2, HLSLBaseType_Double2x2, HLSLBaseType_Double2); + AddIntrinsic("mul", HLSLBaseType_Double3, HLSLBaseType_Double3x3, HLSLBaseType_Double3); + AddIntrinsic("mul", HLSLBaseType_Double4, HLSLBaseType_Double4x4, HLSLBaseType_Double4); + + // matrix transpose + RegisterIntrinsics("transpose", 1, AllFloats | AllMats); + + // determinant needs to return scalar for all 9 mat types + AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float2x2); + AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float3x3); + AddIntrinsic("determinant", HLSLBaseType_Float, HLSLBaseType_Float4x4); + AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half2x2); + AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half3x3); + AddIntrinsic("determinant", HLSLBaseType_Half, HLSLBaseType_Half4x4); + AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double2x2); + AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double3x3); + AddIntrinsic("determinant", HLSLBaseType_Double, HLSLBaseType_Double4x4); + +#if 0 + // TODO: more conversions fp16, double, etc. + // MSL can just do simple casts. These are more for getting data in/out + // of shader stages, or out of DataAddressBuffer which has single type. + AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float); + AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Double); + AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Half); + + AddIntrinsic("asfloat", HLSLBaseType_Float, HLSLBaseType_Int ); + + // AddIntrinsic("f16tof32", HLSLBaseType_Float, HLSLBaseType_Uint ); // lower 16-bits + // AddIntrinsic("f32tof16", HLSLBaseType_Uint, HLSLBaseType_Float ); + + // "faceforward" Returns -n * sign(dot(i, ng)). + + AddIntrinsic("asint", HLSLBaseType_Uint, HLSLBaseType_Float); + + // low/hi uint + AddIntrinsic("asdouble", HLSLBaseType_Double, HLSLBaseType_Uint, HLSLBaseType_Uint); + + // one for 64-bit too (low/hi uint) + AddIntrinsic("asuint", HLSLBaseType_Ulong, HLSLBaseType_Uint, HLSLBaseType_Uint); + AddIntrinsic("asuint", HLSLBaseType_Uint, HLSLBaseType_Float); +#endif + +#if 0 + // TODO: get atomics working + // these work on atomic_int/uint, then bool/ulong 2.4, + // then sub/add on float in MSL 3.0. How does HLSL declare atomic values? + + // How to designate atomics? These have atomic_u/int type in MSL. + // Metal just uses atomic, atomic, ... + + // memory_order_relaxed is only value to pass + atomic_fetch_add_explicit(output, val, memory_order_relaxed); + atomic_fetch_sub_explicit(output, val, memory_order_relaxed); + atomic_fetch_min_explicit(output, val, memory_order_relaxed); + atomic_fetch_max_explicit(output, val, memory_order_relaxed); + atomic_fetch_and_explicit(output, val, memory_order_relaxed); + atomic_fetch_or_explicit(output, val, memory_order_relaxed); + atomic_fetch_xor_explicit(output, val, memory_order_relaxed); + + bool atomic_compare_exchange_weak_explicit(device A* object, + C *expected, C desired, memory_order success, + memory_order failure) + + void atomic_store_explicit(device A* object, C desired, + memory_order order) + + // Here's how to emulate in MSL + void InterlockedAdd(device atomic* dst, uint val, out uint original ) + { + original = atomic_fetch_add_explicit(dst, val, memory_order_relaxed); + } + + AddIntrisic("InterlockedAdd", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedAdd", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedSub", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedSub", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedMin", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedMin", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedMax", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedMax", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedAnd", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedAnd", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedOr", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedOr", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + AddIntrisic("InterlockedXor", HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int); + AddIntrisic("InterlockedXor", HLSLBaseType_Void, HLSLBaseType_AtomicUint, HLSLBaseType_Uint, HLSLBaseType_Uint); + + // compare dst + compareValue, and store 3rd if same, nothing returned + InterlockCompareStore(HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int) + + // extra param here, last value is always original value + // compare dst + compareValue, store 3rd if same, return original dst + InterlockedCompareExchage(HLSLBaseType_Void, HLSLBaseType_AtomicInt, HLSLBaseType_Int, HLSLBaseType_Int, HLSLBaseType_Int) + +#endif + + // TODO: split off sampler intrinsics from math above + // these need to be member functions and have default arg value support + + //------------------------ + + // TODO: need optional offset + + // Cannot use Sample with 2DMS/Array + AddTextureIntrinsics("Sample", HLSLBaseType_Texture2D, HLSLBaseType_Float2); // Int2 offset + AddTextureIntrinsics("Sample", HLSLBaseType_Texture3D, HLSLBaseType_Float3); // Int3 offset + AddTextureIntrinsics("Sample", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3); // Int2 offset + + // these don't have offset + AddTextureIntrinsics("Sample", HLSLBaseType_TextureCube, HLSLBaseType_Float3); + AddTextureIntrinsics("Sample", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4); + + // Depth + AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2); // Int2 offset + AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3); // Int2 offset + AddDepthIntrinsic("Sample", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float3); // no offset + + AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float); + AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float); + AddDepthIntrinsic("SampleCmp", HLSLBaseType_Float, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float); + + // returns float4 w/comparisons, probably only on mip0 + // TODO: add GatherRed? to read 4 depth values + AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2D, HLSLBaseType_Float2, HLSLBaseType_Float); + AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_Depth2DArray, HLSLBaseType_Float3, HLSLBaseType_Float); + AddDepthIntrinsic("GatherCmp", HLSLBaseType_Float4, HLSLBaseType_DepthCube, HLSLBaseType_Float3, HLSLBaseType_Float); + + // one more dimension than Sample + AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float); + AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float); + AddTextureIntrinsics("SampleLevel", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float); + AddTextureIntrinsics("SampleLevel", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float); + // TEXTURE_INTRINSIC_FUNCTION("SampleLevel", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float); + + // bias always in w + AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2D, HLSLBaseType_Float2, HLSLBaseType_Float); + AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture3D, HLSLBaseType_Float3, HLSLBaseType_Float); + AddTextureIntrinsics("SampleBias", HLSLBaseType_Texture2DArray, HLSLBaseType_Float3, HLSLBaseType_Float); + + // no offset on cube/cubearray + AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCube, HLSLBaseType_Float3, HLSLBaseType_Float); + // AddTextureIntrinsics("SampleBias", HLSLBaseType_TextureCubeArray, HLSLBaseType_Float4, Float); + + // TODO: for 2D tex (int2 offset is optional, how to indicate that?) + // arguments have defaultValue that can be set. + + AddTextureIntrinsics("GatherRed", HLSLBaseType_Texture2D, HLSLBaseType_Float2); + AddTextureIntrinsics("GatherGreen", HLSLBaseType_Texture2D, HLSLBaseType_Float2); + AddTextureIntrinsics("GatherBlue", HLSLBaseType_Texture2D, HLSLBaseType_Float2); + AddTextureIntrinsics("GatherAlpha", HLSLBaseType_Texture2D, HLSLBaseType_Float2); + + // TODO: add more types cube/3d takes gradient3d in MSL + // The Intrinsic ctor would need to have 5 args instead 4 + // first move to member functions, then add this with 4 args + // AddTextureIntrinsics( "SampleGrad", HLSLBaseType_Texture2D, HLSLBaseType_Float, HLSLBaseType_Float2, HLSLBaseType_Float2, HLSLBaseType_Float2); + + // These constructs are not declaring the lod or offset param which have default + AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2D, HLSLBaseType_Int2); // TODO: needs lod + AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture3D, HLSLBaseType_Int3); // TODO: need lod + AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DArray, HLSLBaseType_Int2); // TODO: needs array, lod + // AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCube, HLSLBaseType_Int2); // TODO: needs face, lod + // AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_TextureCubeArray, HLSLBaseType_Int2); // TODO: needs face, lod, array + AddTextureLoadIntrinsic("Load", HLSLBaseType_Float4, HLSLBaseType_Texture2DMS, HLSLBaseType_Int2); // TODO: needs sampleIndex + + // TODO: aren't these uint instead of int? + AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2D, HLSLBaseType_Int2); + AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture3D, HLSLBaseType_Int3); + AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2DArray, HLSLBaseType_Int3); + AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCube, HLSLBaseType_Int3); + AddTextureIntrinsics("GetDimensions", HLSLBaseType_TextureCubeArray, HLSLBaseType_Int3); + AddTextureIntrinsics("GetDimensions", HLSLBaseType_Texture2DMS, HLSLBaseType_Int2); + + return true; +}; + +// The order in this array must match up with HLSLBinaryOp +const int _binaryOpPriority[] = + { + 2, 1, // &&, || + 8, 8, // +, - + 9, 9, // *, / + 7, 7, // <, >, + 7, 7, // <=, >=, + 6, 6, // ==, != + 5, 3, 4, // &, |, ^ +}; + +BaseTypeDescription baseTypeDescriptions[HLSLBaseType_Count]; + +void RegisterMatrix(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName, uint32_t dim1, uint32_t dim2) +{ + char buf[32]; + snprintf(buf, sizeof(buf), "%s%dx%d", typeName, dim1, dim2); + const char* name = gStringPool.AddString(buf); + + HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset); + + BaseTypeDescription& desc = baseTypeDescriptions[baseType]; + desc.typeName = name; + desc.typeNameMetal = name; + + desc.baseType = baseType; + desc.coreType = CoreType_Matrix; + desc.dimensionType = DimensionType(DimensionType_Matrix2x2 + (dim2 - 2)); + desc.numericType = numericType; + + desc.numDimensions = 2; + desc.numComponents = dim1; + desc.height = dim2; + desc.binaryOpRank = binaryOpRank; +} + +void RegisterVector(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName, uint32_t dim) +{ + char buf[32]; + snprintf(buf, sizeof(buf), "%s%d", typeName, dim); + const char* name = gStringPool.AddString(buf); + + HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset); + + BaseTypeDescription& desc = baseTypeDescriptions[type + typeOffset]; + desc.typeName = name; + desc.typeNameMetal = name; + + // 4 types + desc.baseType = baseType; + desc.coreType = CoreType_Vector; + desc.dimensionType = DimensionType(DimensionType_Vector2 + (dim - 2)); + desc.numericType = numericType; + + desc.numDimensions = 1; + desc.numComponents = dim; + desc.height = 1; + desc.binaryOpRank = binaryOpRank; +} + +void RegisterScalar(HLSLBaseType type, uint32_t typeOffset, NumericType numericType, int binaryOpRank, const char* typeName) +{ + const char* name = gStringPool.AddString(typeName); + + HLSLBaseType baseType = (HLSLBaseType)(type + typeOffset); + + BaseTypeDescription& desc = baseTypeDescriptions[baseType]; + desc.typeName = name; + desc.typeNameMetal = name; + + // 4 types + desc.baseType = baseType; + desc.coreType = CoreType_Scalar; + desc.dimensionType = DimensionType_Scalar; + desc.numericType = numericType; + + desc.numDimensions = 0; + desc.numComponents = 1; + desc.height = 1; + desc.binaryOpRank = binaryOpRank; +} + +void RegisterTexture(HLSLBaseType baseType, const char* typeName, const char* typeNameMetal) +{ + BaseTypeDescription& desc = baseTypeDescriptions[baseType]; + desc.baseType = baseType; + desc.typeName = typeName; + desc.typeNameMetal = typeNameMetal; + + desc.coreType = CoreType_Texture; +} + +void RegisterSampler(HLSLBaseType baseType, const char* typeName, const char* typeNameMetal) +{ + BaseTypeDescription& desc = baseTypeDescriptions[baseType]; + desc.baseType = baseType; + desc.typeName = typeName; + desc.typeNameMetal = typeNameMetal; + + desc.coreType = CoreType_Sampler; +} + +void RegisterType(HLSLBaseType baseType, CoreType coreType, const char* typeName) +{ + BaseTypeDescription& desc = baseTypeDescriptions[baseType]; + desc.baseType = baseType; + desc.typeName = typeName; + desc.typeNameMetal = typeName; + + desc.coreType = coreType; +} + +bool InitBaseTypeDescriptions() +{ + { + const uint32_t kNumTypes = 3; + const char* typeNames[kNumTypes] = {"float", "half", "double"}; + const HLSLBaseType baseTypes[kNumTypes] = {HLSLBaseType_Float, HLSLBaseType_Half, HLSLBaseType_Double}; + const NumericType numericTypes[kNumTypes] = {NumericType_Float, NumericType_Half, NumericType_Double}; + const int binaryOpRanks[kNumTypes] = {0, 1, 2}; + + for (uint32_t i = 0; i < kNumTypes; ++i) { + const char* typeName = typeNames[i]; + HLSLBaseType baseType = baseTypes[i]; + NumericType numericType = numericTypes[i]; + int binaryOpRank = binaryOpRanks[i]; + + RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName); + RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2); + RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3); + RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4); + + RegisterMatrix(baseType, 4, numericType, binaryOpRank, typeName, 2, 2); + RegisterMatrix(baseType, 5, numericType, binaryOpRank, typeName, 3, 3); + RegisterMatrix(baseType, 6, numericType, binaryOpRank, typeName, 4, 4); + } + } + + { + const uint32_t kNumTypes = 7; + const char* typeNames[kNumTypes] = { + "int", "uint", + "long", "ulong", + "short", "ushort", + "bool"}; + const HLSLBaseType baseTypes[kNumTypes] = { + HLSLBaseType_Int, HLSLBaseType_Uint, + HLSLBaseType_Long, HLSLBaseType_Ulong, + HLSLBaseType_Short, HLSLBaseType_Ushort, + HLSLBaseType_Bool}; + const NumericType numericTypes[kNumTypes] = { + NumericType_Int, NumericType_Uint, + NumericType_Long, NumericType_Ulong, + NumericType_Short, NumericType_Ushort, + NumericType_Bool}; + const int binaryOpRanks[kNumTypes] = { + 2, 1, // Note: int seems like it should be highest + 3, 2, + 4, 3, + 4}; + + for (uint32_t i = 0; i < kNumTypes; ++i) { + const char* typeName = typeNames[i]; + HLSLBaseType baseType = baseTypes[i]; + NumericType numericType = numericTypes[i]; + int binaryOpRank = binaryOpRanks[i]; + + RegisterScalar(baseType, 0, numericType, binaryOpRank, typeName); + RegisterVector(baseType, 1, numericType, binaryOpRank, typeName, 2); + RegisterVector(baseType, 2, numericType, binaryOpRank, typeName, 3); + RegisterVector(baseType, 3, numericType, binaryOpRank, typeName, 4); + } + } + + // TODO: add u/char, but HLSL2021 doesn't have support, but MSL does + + // TODO: would it be better to use "texture" base type (see "buffer") + // and then have a TextureSubType off that? + + // texutres + RegisterTexture(HLSLBaseType_Texture2D, "Texture2D", "texture2d"); + RegisterTexture(HLSLBaseType_Texture2DArray, "Texture2DArray", "texture2d_array"); + RegisterTexture(HLSLBaseType_Texture3D, "Texture3D", "texture3d"); + RegisterTexture(HLSLBaseType_TextureCube, "TextureCube", "texturecube"); + RegisterTexture(HLSLBaseType_TextureCubeArray, "TextureCubeArray", "texturecube_rray"); + RegisterTexture(HLSLBaseType_Texture2DMS, "Texture2DMS", "texture2d_ms"); + + RegisterTexture(HLSLBaseType_Depth2D, "Depth2D", "depth2d"); + RegisterTexture(HLSLBaseType_Depth2DArray, "Depth2DArray", "depth2d_array"); + RegisterTexture(HLSLBaseType_DepthCube, "DepthCube", "depthcube"); + + RegisterTexture(HLSLBaseType_RWTexture2D, "RWTexture2D", "texture2d"); + + // samplers + RegisterSampler(HLSLBaseType_SamplerState, "SamplerState", "sampler"); + RegisterSampler(HLSLBaseType_SamplerComparisonState, "SamplerComparisonState", "sampler"); + + RegisterType(HLSLBaseType_UserDefined, CoreType_Struct, "struct"); + RegisterType(HLSLBaseType_Void, CoreType_Void, "void"); + RegisterType(HLSLBaseType_Unknown, CoreType_None, "unknown"); + RegisterType(HLSLBaseType_Expression, CoreType_Expression, "expression"); + RegisterType(HLSLBaseType_Comment, CoreType_Comment, "comment"); + RegisterType(HLSLBaseType_Buffer, CoreType_Buffer, "buffer"); + + return true; +} + +static bool _initBaseTypeDescriptions = InitBaseTypeDescriptions(); + +// this needs to happen after base descriptions +static bool _initIntrinsics = InitIntrinsics(); + +HLSLBaseType ArithmeticOpResultType(HLSLBinaryOp binaryOp, HLSLBaseType t1, HLSLBaseType t2) +{ + // check that both are same numeric types + + // add, sub, div are similar + // mul is it's own test + + // most mixing of types is invalid here + + if (IsNumericTypeEqual(t1, t2)) { + bool isSameDimensions = IsDimensionEqual(t1, t2); + + if (IsScalarType(t1) && IsScalarType(t2)) { + if (isSameDimensions) return t1; + } + else if (IsVectorType(t1) && IsVectorType(t2)) { + if (isSameDimensions) return t1; + } + else if (IsMatrixType(t1) && IsMatrixType(t2)) { + if (isSameDimensions) return t1; + } + + else if ((binaryOp == HLSLBinaryOp_Add || binaryOp == HLSLBinaryOp_Sub) && + (IsScalarType(t1) || IsScalarType(t2))) { + // allow v + 1, and 1 - v + return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2; + } + + else if ((binaryOp == HLSLBinaryOp_Mul || binaryOp == HLSLBinaryOp_Div) && + (IsScalarType(t1) || IsScalarType(t2))) { + // v * s + return (IsVectorType(t1) || IsMatrixType(t1)) ? t1 : t2; + } + + // this has to check dimension across the mul + else if (binaryOp == HLSLBinaryOp_Mul) { + bool isSameCrossDimension = IsCrossDimensionEqual(t1, t2); + + if (IsMatrixType(t1) && IsVectorType(t2)) { + if (isSameCrossDimension) return t2; + } + else if (IsVectorType(t1) && IsMatrixType(t2)) { + if (isSameCrossDimension) return t1; + } + } + } + + return HLSLBaseType_Unknown; +} + +// Priority of the ? : operator. +const int _conditionalOpPriority = 1; + +const char* GetTypeNameHLSL(const HLSLType& type) +{ + if (type.baseType == HLSLBaseType_UserDefined) { + return type.typeName; + } + else { + return baseTypeDescriptions[type.baseType].typeName; + } +} + +const char* GetTypeNameMetal(const HLSLType& type) +{ + if (type.baseType == HLSLBaseType_UserDefined) { + return type.typeName; + } + else { + return baseTypeDescriptions[type.baseType].typeNameMetal; + } +} + +static const char* GetBinaryOpName(HLSLBinaryOp binaryOp) +{ + switch (binaryOp) { + case HLSLBinaryOp_And: + return "&&"; + case HLSLBinaryOp_Or: + return "||"; + + case HLSLBinaryOp_Add: + return "+"; + case HLSLBinaryOp_Sub: + return "-"; + case HLSLBinaryOp_Mul: + return "*"; + case HLSLBinaryOp_Div: + return "/"; + + case HLSLBinaryOp_Less: + return "<"; + case HLSLBinaryOp_Greater: + return ">"; + case HLSLBinaryOp_LessEqual: + return "<="; + case HLSLBinaryOp_GreaterEqual: + return ">="; + case HLSLBinaryOp_Equal: + return "=="; + case HLSLBinaryOp_NotEqual: + return "!="; + + case HLSLBinaryOp_BitAnd: + return "&"; + case HLSLBinaryOp_BitOr: + return "|"; + case HLSLBinaryOp_BitXor: + return "^"; + + case HLSLBinaryOp_Assign: + return "="; + case HLSLBinaryOp_AddAssign: + return "+="; + case HLSLBinaryOp_SubAssign: + return "-="; + case HLSLBinaryOp_MulAssign: + return "*="; + case HLSLBinaryOp_DivAssign: + return "/="; + default: + ASSERT(false); + return "???"; + } +} + +/* + * 1.) Match + * 2.) Scalar dimension promotion (scalar -> vector/matrix) + * 3.) Conversion + * 4.) Conversion + scalar dimension promotion + * 5.) Truncation (vector -> scalar or lower component vector, matrix -> scalar or lower component matrix) + * 6.) Conversion + truncation + */ +static int GetTypeCastRank(HLSLTree* tree, const HLSLType& srcType, const HLSLType& dstType) +{ + /*if (srcType.array != dstType.array || srcType.arraySize != dstType.arraySize) + { + return -1; + }*/ + + if (srcType.array != dstType.array) { + return -1; + } + + if (srcType.array == true) { + ASSERT(dstType.array == true); + int srcArraySize = -1; + int dstArraySize = -1; + + tree->GetExpressionValue(srcType.arraySize, srcArraySize); + tree->GetExpressionValue(dstType.arraySize, dstArraySize); + + if (srcArraySize != dstArraySize) { + return -1; + } + } + + if (srcType.baseType == HLSLBaseType_UserDefined && dstType.baseType == HLSLBaseType_UserDefined) { + return String_Equal(srcType.typeName, dstType.typeName) ? 0 : -1; + } + + if (srcType.baseType == dstType.baseType) { + // This only works if textures are half or float, but not hwne + // there are more varied texture that can be cast. + if (IsTextureType(srcType.baseType)) { + return srcType.formatType == dstType.formatType ? 0 : -1; + } + + return 0; + } + + const BaseTypeDescription& srcDesc = baseTypeDescriptions[srcType.baseType]; + const BaseTypeDescription& dstDesc = baseTypeDescriptions[dstType.baseType]; + if (srcDesc.numericType == NumericType_NaN || dstDesc.numericType == NumericType_NaN) { + return -1; + } + + // Result bits: T R R R P (T = truncation, R = conversion rank, P = dimension promotion) + int result = _numberTypeRank[srcDesc.numericType][dstDesc.numericType] << 1; + + if (srcDesc.numDimensions == 0 && dstDesc.numDimensions > 0) { + // Scalar dimension promotion + result |= (1 << 0); + } + else if ((srcDesc.numDimensions == dstDesc.numDimensions && (srcDesc.numComponents > dstDesc.numComponents || srcDesc.height > dstDesc.height)) || + (srcDesc.numDimensions > 0 && dstDesc.numDimensions == 0)) { + // Truncation + result |= (1 << 4); + } + else if (srcDesc.numDimensions != dstDesc.numDimensions || + srcDesc.numComponents != dstDesc.numComponents || + srcDesc.height != dstDesc.height) { + // Can't convert + return -1; + } + + return result; +} + +static bool GetFunctionCallCastRanks(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function, int* rankBuffer) +{ + if (function == NULL || function->numArguments < call->numArguments) { + // Function not viable + return false; + } + + const HLSLExpression* expression = call->argument; + const HLSLArgument* argument = function->argument; + + for (int i = 0; i < call->numArguments; ++i) { + int rank = GetTypeCastRank(tree, expression->expressionType, argument->type); + if (rank == -1) { + return false; + } + + rankBuffer[i] = rank; + + argument = argument->nextArgument; + expression = expression->nextExpression; + } + + for (int i = call->numArguments; i < function->numArguments; ++i) { + if (argument->defaultValue == NULL) { + // Function not viable. + return false; + } + } + + return true; +} + +struct CompareRanks { + bool operator()(const int& rank1, const int& rank2) { return rank1 > rank2; } +}; + +static CompareFunctionsResult CompareFunctions(HLSLTree* tree, const HLSLFunctionCall* call, const HLSLFunction* function1, const HLSLFunction* function2) +{ + int* function1Ranks = static_cast(alloca(sizeof(int) * call->numArguments)); + int* function2Ranks = static_cast(alloca(sizeof(int) * call->numArguments)); + + const bool function1Viable = GetFunctionCallCastRanks(tree, call, function1, function1Ranks); + const bool function2Viable = GetFunctionCallCastRanks(tree, call, function2, function2Ranks); + + // Both functions have to be viable to be able to compare them + if (!(function1Viable && function2Viable)) { + if (function1Viable) { + return Function1Better; + } + else if (function2Viable) { + return Function2Better; + } + else { + return FunctionsEqual; + } + } + + std::sort(function1Ranks, function1Ranks + call->numArguments, CompareRanks()); + std::sort(function2Ranks, function2Ranks + call->numArguments, CompareRanks()); + + for (int i = 0; i < call->numArguments; ++i) { + if (function1Ranks[i] < function2Ranks[i]) { + return Function1Better; + } + else if (function2Ranks[i] < function1Ranks[i]) { + return Function2Better; + } + } + + return FunctionsEqual; +} + +static bool GetBinaryOpResultType(HLSLBinaryOp binaryOp, const HLSLType& type1, const HLSLType& type2, HLSLType& result) +{ + // only allow numeric types for binary operators + if (!IsNumericType(type1.baseType) || type1.array || + !IsNumericType(type2.baseType) || type2.array) { + return false; + } + + if (IsBitOp(binaryOp)) { + if (!IsIntegerType(type1.baseType)) { + return false; + } + } + + if (IsLogicOp(binaryOp) || IsCompareOp(binaryOp)) { + int numComponents = std::max(baseTypeDescriptions[type1.baseType].numComponents, baseTypeDescriptions[type2.baseType].numComponents); + result.baseType = HLSLBaseType(HLSLBaseType_Bool + numComponents - 1); + } + else { + // TODO: allso mulAssign, ... + assert(!IsAssignOp(binaryOp)); + + result.baseType = ArithmeticOpResultType(binaryOp, type1.baseType, type2.baseType); + } + + result.typeName = NULL; + result.array = false; + result.arraySize = NULL; + result.flags = (type1.flags & type2.flags) & HLSLTypeFlag_Const; // Propagate constness. + + return result.baseType != HLSLBaseType_Unknown; +} + +HLSLParser::HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length) : m_tokenizer(fileName, buffer, length), + m_userTypes(allocator), + m_variables(allocator), + m_functions(allocator) +{ + m_numGlobals = 0; + m_tree = NULL; +} + +bool HLSLParser::Accept(int token) +{ + if (m_tokenizer.GetToken() == token) { + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::Accept(const char* token) +{ + if (m_tokenizer.GetToken() == HLSLToken_Identifier && String_Equal(token, m_tokenizer.GetIdentifier())) { + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::Expect(int token) +{ + if (!Accept(token)) { + char want[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(token, want); + char near[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(near); + m_tokenizer.Error("Syntax error: expected '%s' near '%s'", want, near); + return false; + } + return true; +} + +bool HLSLParser::Expect(const char* token) +{ + if (!Accept(token)) { + const char* want = token; + char near[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(near); + m_tokenizer.Error("Syntax error: expected '%s' near '%s'", want, near); + return false; + } + return true; +} + +bool HLSLParser::AcceptIdentifier(const char*& identifier) +{ + if (m_tokenizer.GetToken() == HLSLToken_Identifier) { + identifier = m_tree->AddString(m_tokenizer.GetIdentifier()); + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::ExpectIdentifier(const char*& identifier) +{ + if (!AcceptIdentifier(identifier)) { + char near[HLSLTokenizer::s_maxIdentifier] = {}; + m_tokenizer.GetTokenName(near); + m_tokenizer.Error("Syntax error: expected identifier near '%s'", near); + identifier = ""; + return false; + } + return true; +} + +bool HLSLParser::AcceptFloat(float& value) +{ + if (m_tokenizer.GetToken() == HLSLToken_FloatLiteral) { + value = m_tokenizer.GetFloat(); + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::AcceptHalf(float& value) +{ + if (m_tokenizer.GetToken() == HLSLToken_HalfLiteral) { + value = m_tokenizer.GetFloat(); + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::AcceptInt(int& value) +{ + if (m_tokenizer.GetToken() == HLSLToken_IntLiteral) { + value = m_tokenizer.GetInt(); + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::ParseTopLevel(HLSLStatement*& statement) +{ + HLSLAttribute* attributes = NULL; + ParseAttributeBlock(attributes); + + int line = GetLineNumber(); + const char* fileName = GetFileName(); + + HLSLType type; + //HLSLBaseType type; + //const char* typeName = NULL; + //int typeFlags = false; + + // TODO: this cast likely isn't safe + HLSLToken token = (HLSLToken)m_tokenizer.GetToken(); + + bool doesNotExpectSemicolon = false; + + // Alec add comment + if (ParseComment(statement)) { + doesNotExpectSemicolon = true; + } + else if (Accept(HLSLToken_Struct)) { + // Struct declaration. + + const char* structName = NULL; + if (!ExpectIdentifier(structName)) { + return false; + } + if (FindUserDefinedType(structName) != NULL) { + m_tokenizer.Error("struct %s already defined", structName); + return false; + } + + if (!Expect('{')) { + return false; + } + + HLSLStruct* structure = m_tree->AddNode(fileName, line); + structure->name = structName; + + m_userTypes.PushBack(structure); + + HLSLStructField* lastField = NULL; + + // Add the struct to our list of user defined types. + while (!Accept('}')) { + if (CheckForUnexpectedEndOfStream('}')) { + return false; + } + + // chain fields onto struct + HLSLStructField* field = NULL; + if (!ParseFieldDeclaration(field)) { + return false; + } + ASSERT(field != NULL); + if (lastField == NULL) { + structure->field = field; + } + else { + lastField->nextField = field; + } + lastField = field; + } + + statement = structure; + } + else if (Accept(HLSLToken_ConstantBuffer) || + Accept(HLSLToken_StructuredBuffer) || + Accept(HLSLToken_RWStructuredBuffer) || + Accept(HLSLToken_ByteAddressBuffer) || + Accept(HLSLToken_RWByteAddressBuffer)) { + HLSLBuffer* buffer = m_tree->AddNode(fileName, line); + + // these can appear on t or u slots for read vs. read/write + // need to track what the user specified. Load vs. Store calls. + buffer->bufferType = ConvertTokenToBufferType(token); + + // Is template struct type required? + if (Expect('<')) { + const char* structName = nullptr; + + // Read the templated type, should reference a struct + // don't need to support fields on this. + if (!ExpectIdentifier(structName) || !Expect('>')) { + return false; + } + + buffer->bufferStruct = const_cast(FindUserDefinedType(structName)); + if (!buffer->bufferStruct) { + return false; + } + } + + // get name of buffer + AcceptIdentifier(buffer->name); + + // Parse ": register(t0/u0)" + if (Accept(':')) { + if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')')) { + return false; + } + // TODO: Check that we aren't re-using a register. + } + + // Buffer needs to show up to reference the fields + // of the struct of the templated type. + HLSLType bufferType(HLSLBaseType_UserDefined); + bufferType.typeName = buffer->bufferStruct->name; // this is for userDefined name (f.e. struct) + + DeclareVariable(buffer->name, bufferType); + + // TODO: add fields as variables too? + + statement = buffer; + } + else if (Accept(HLSLToken_CBuffer) || Accept(HLSLToken_TBuffer)) { + // cbuffer/tbuffer declaration. + + HLSLBuffer* buffer = m_tree->AddNode(fileName, line); + AcceptIdentifier(buffer->name); + + buffer->bufferType = ConvertTokenToBufferType(token); + + // Optional register assignment. + if (Accept(':')) { + if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(buffer->registerName) || !Expect(')')) { + return false; + } + // TODO: Check that we aren't re-using a register. + } + + // Fields are defined inside the c/tbuffer. + // These represent globals to the rest of the codebase which + // is simply evil. + + if (!Expect('{')) { + return false; + } + HLSLDeclaration* lastField = NULL; + while (!Accept('}')) { + if (CheckForUnexpectedEndOfStream('}')) { + return false; + } + + // TODO: can't convert statement to fields + if (ParseComment(statement)) { + continue; + } + + HLSLDeclaration* field = NULL; + if (!ParseDeclaration(field)) { + m_tokenizer.Error("Expected variable declaration"); + return false; + } + + // These show up as global variables of the fields + DeclareVariable(field->name, field->type); + + // chain fields onto buffer + field->buffer = buffer; + if (buffer->field == NULL) { + buffer->field = field; + } + else { + lastField->nextStatement = field; + } + lastField = field; + + if (!Expect(';')) { + return false; + } + } + + statement = buffer; + } + else if (AcceptType(true, type)) { + // Global declaration (uniform or function). + const char* globalName = NULL; + if (!ExpectIdentifier(globalName)) { + return false; + } + + if (Accept('(')) { + // Function declaration. + + HLSLFunction* function = m_tree->AddNode(fileName, line); + function->name = globalName; + function->returnType.baseType = type.baseType; + function->returnType.typeName = type.typeName; + function->attributes = attributes; + + BeginScope(); + + if (!ParseArgumentList(function->argument, function->numArguments, function->numOutputArguments)) { + return false; + } + + const HLSLFunction* declaration = FindFunction(function); + + // Forward declaration + if (Accept(';')) { + // Add a function entry so that calls can refer to it + if (!declaration) { + m_functions.PushBack(function); + statement = function; + } + EndScope(); + return true; + } + + // Optional semantic. + if (Accept(':') && !ExpectIdentifier(function->semantic)) { + return false; + } + + if (declaration) { + if (declaration->forward || declaration->statement) { + m_tokenizer.Error("Duplicate function definition"); + return false; + } + + const_cast(declaration)->forward = function; + } + else { + m_functions.PushBack(function); + } + + if (!Expect('{') || !ParseBlock(function->statement, function->returnType)) { + return false; + } + + EndScope(); + + // Note, no semi-colon at the end of a function declaration. + statement = function; + + return true; + } + else { + // Uniform declaration. + HLSLDeclaration* declaration = m_tree->AddNode(fileName, line); + declaration->name = globalName; + declaration->type = type; + + // Handle array syntax. + if (Accept('[')) { + if (!Accept(']')) { + if (!ParseExpression(declaration->type.arraySize) || !Expect(']')) { + return false; + } + } + declaration->type.array = true; + } + + // Handle optional register. + if (Accept(':')) { + // @@ Currently we support either a semantic or a register, but not both. + if (AcceptIdentifier(declaration->semantic)) { + // int k = 1; + } + else if (!Expect(HLSLToken_Register) || !Expect('(') || !ExpectIdentifier(declaration->registerName) || !Expect(')')) { + return false; + } + } + + DeclareVariable(globalName, declaration->type); + + if (!ParseDeclarationAssignment(declaration)) { + return false; + } + + // TODO: Multiple variables declared on one line. + + statement = declaration; + } + } + + /* + // These three are from .fx file syntax + else if (ParseTechnique(statement)) { + doesNotExpectSemicolon = true; + } + else if (ParsePipeline(statement)) { + doesNotExpectSemicolon = true; + } + else if (ParseStage(statement)) { + doesNotExpectSemicolon = true; + } + */ + + if (statement != NULL) { + statement->attributes = attributes; + } + + return doesNotExpectSemicolon || Expect(';'); +} + +bool HLSLParser::ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped /*=true*/) +{ + if (scoped) { + BeginScope(); + } + if (Accept('{')) { + if (!ParseBlock(firstStatement, returnType)) { + return false; + } + } + else { + if (!ParseStatement(firstStatement, returnType)) { + return false; + } + } + if (scoped) { + EndScope(); + } + return true; +} + +bool HLSLParser::ParseComment(HLSLStatement*& statement) +{ + if (m_tokenizer.GetToken() != HLSLToken_Comment) + return false; + + const char* textName = m_tree->AddString(m_tokenizer.GetComment()); + + // This has already parsed the next comment before have had a chance to + // grab the string from the previous comment, if they were sequenential comments. + // So grabbing a copy of comment before this parses the next comment. + if (!Accept(HLSLToken_Comment)) + return false; + + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLComment* comment = m_tree->AddNode(fileName, line); + comment->text = textName; + + // pass it back + statement = comment; + return true; +} + +bool HLSLParser::ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType) +{ + HLSLStatement* lastStatement = NULL; + while (!Accept('}')) { + if (CheckForUnexpectedEndOfStream('}')) { + return false; + } + + HLSLStatement* statement = NULL; + + if (!ParseStatement(statement, returnType)) { + return false; + } + + // chain statements onto the list + if (statement != NULL) { + if (firstStatement == NULL) { + firstStatement = statement; + } + else { + lastStatement->nextStatement = statement; + } + lastStatement = statement; + + // some statement parsing can gen more than one statement, so find end + while (lastStatement->nextStatement) + lastStatement = lastStatement->nextStatement; + } + } + return true; +} + +bool HLSLParser::ParseStatement(HLSLStatement*& statement, const HLSLType& returnType) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + // Empty statements. + if (Accept(';')) { + return true; + } + + HLSLAttribute* attributes = NULL; + ParseAttributeBlock(attributes); // @@ Leak if not assigned to node? + +#if 0 + // @@ Work in progress. + // Alec? - @If, @Else blocks, are these like specialization constants? +/* + // Static statements: @if only for now. + if (Accept('@')) + { + if (Accept(HLSLToken_If)) + { + //HLSLIfStatement* ifStatement = m_tree->AddNode(fileName, line); + //ifStatement->isStatic = true; + //ifStatement->attributes = attributes; + + HLSLExpression * condition = NULL; + + m_allowUndeclaredIdentifiers = true; // Not really correct... better to push to stack? + if (!Expect('(') || !ParseExpression(condition) || !Expect(')')) + { + m_allowUndeclaredIdentifiers = false; + return false; + } + m_allowUndeclaredIdentifiers = false; + + if ((condition->expressionType.flags & HLSLTypeFlag_Const) == 0) + { + m_tokenizer.Error("Syntax error: @if condition is not constant"); + return false; + } + + int conditionValue; + if (!m_tree->GetExpressionValue(condition, conditionValue)) + { + m_tokenizer.Error("Syntax error: Cannot evaluate @if condition"); + return false; + } + + if (!conditionValue) m_disableSemanticValidation = true; + + HLSLStatement * ifStatements = NULL; + HLSLStatement * elseStatements = NULL; + + if (!ParseStatementOrBlock(ifStatements, returnType, false)) + { + m_disableSemanticValidation = false; + return false; + } + if (Accept(HLSLToken_Else)) + { + if (conditionValue) m_disableSemanticValidation = true; + + if (!ParseStatementOrBlock(elseStatements, returnType, false)) + { + m_disableSemanticValidation = false; + return false; + } + } + m_disableSemanticValidation = false; + + if (conditionValue) statement = ifStatements; + else statement = elseStatements; + + // @@ Free the pruned statements? + + return true; + } + else { + m_tokenizer.Error("Syntax error: unexpected token '@'"); + } + } +*/ +#endif + + if (ParseComment(statement)) { + return true; + } + + // If statement. + if (Accept(HLSLToken_If)) { + HLSLIfStatement* ifStatement = m_tree->AddNode(fileName, line); + ifStatement->attributes = attributes; + if (!Expect('(') || !ParseExpression(ifStatement->condition) || !Expect(')')) { + return false; + } + statement = ifStatement; + if (!ParseStatementOrBlock(ifStatement->statement, returnType)) { + return false; + } + if (Accept(HLSLToken_Else)) { + return ParseStatementOrBlock(ifStatement->elseStatement, returnType); + } + return true; + } + + // For statement. + if (Accept(HLSLToken_For)) { + HLSLForStatement* forStatement = m_tree->AddNode(fileName, line); + forStatement->attributes = attributes; + if (!Expect('(')) { + return false; + } + BeginScope(); + if (!ParseDeclaration(forStatement->initialization)) { + return false; + } + if (!Expect(';')) { + return false; + } + ParseExpression(forStatement->condition); + if (!Expect(';')) { + return false; + } + ParseExpression(forStatement->increment); + if (!Expect(')')) { + return false; + } + statement = forStatement; + if (!ParseStatementOrBlock(forStatement->statement, returnType)) { + return false; + } + EndScope(); + return true; + } + + if (attributes != NULL) { + // @@ Error. Unexpected attribute. We only support attributes associated to if and for statements. + } + + // Block statement. + if (Accept('{')) { + HLSLBlockStatement* blockStatement = m_tree->AddNode(fileName, line); + statement = blockStatement; + BeginScope(); + bool success = ParseBlock(blockStatement->statement, returnType); + EndScope(); + return success; + } + + // Discard statement. + if (Accept(HLSLToken_Discard)) { + HLSLDiscardStatement* discardStatement = m_tree->AddNode(fileName, line); + statement = discardStatement; + return Expect(';'); + } + + // Break statement. + if (Accept(HLSLToken_Break)) { + HLSLBreakStatement* breakStatement = m_tree->AddNode(fileName, line); + statement = breakStatement; + return Expect(';'); + } + + // Continue statement. + if (Accept(HLSLToken_Continue)) { + HLSLContinueStatement* continueStatement = m_tree->AddNode(fileName, line); + statement = continueStatement; + return Expect(';'); + } + + // Return statement + if (Accept(HLSLToken_Return)) { + HLSLReturnStatement* returnStatement = m_tree->AddNode(fileName, line); + if (!Accept(';') && !ParseExpression(returnStatement->expression)) { + return false; + } + // Check that the return expression can be cast to the return type of the function. + HLSLType voidType(HLSLBaseType_Void); + if (!CheckTypeCast(returnStatement->expression ? returnStatement->expression->expressionType : voidType, returnType)) { + return false; + } + + statement = returnStatement; + return Expect(';'); + } + + HLSLDeclaration* declaration = NULL; + HLSLExpression* expression = NULL; + + if (ParseDeclaration(declaration)) { + statement = declaration; + } + else if (ParseExpression(expression)) { + HLSLExpressionStatement* expressionStatement; + expressionStatement = m_tree->AddNode(fileName, line); + expressionStatement->expression = expression; + statement = expressionStatement; + } + + return Expect(';'); +} + +// IC: This is only used in block statements, or within control flow statements. So, it doesn't support semantics or layout modifiers. +// @@ We should add suport for semantics for inline input/output declarations. +bool HLSLParser::ParseDeclaration(HLSLDeclaration*& declaration) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLType type; + if (!AcceptType(/*allowVoid=*/false, type)) { + return false; + } + + bool allowUnsizedArray = true; // This is needed for SSBO + + HLSLDeclaration* firstDeclaration = NULL; + HLSLDeclaration* lastDeclaration = NULL; + + do { + const char* name; + if (!ExpectIdentifier(name)) { + // TODO: false means we didn't accept a declaration and we had an error! + return false; + } + // Handle array syntax. + if (Accept('[')) { + type.array = true; + // Optionally allow no size to the specified for the array. + if (Accept(']') && allowUnsizedArray) { + return true; + } + if (!ParseExpression(type.arraySize) || !Expect(']')) { + return false; + } + } + + HLSLDeclaration* parsedDeclaration = m_tree->AddNode(fileName, line); + parsedDeclaration->type = type; + parsedDeclaration->name = name; + + DeclareVariable(parsedDeclaration->name, parsedDeclaration->type); + + // Handle option assignment of the declared variables(s). + if (!ParseDeclarationAssignment(parsedDeclaration)) { + return false; + } + + if (firstDeclaration == NULL) firstDeclaration = parsedDeclaration; + if (lastDeclaration != NULL) lastDeclaration->nextDeclaration = parsedDeclaration; + lastDeclaration = parsedDeclaration; + + } while (Accept(',')); + + declaration = firstDeclaration; + + return true; +} + +bool HLSLParser::ParseDeclarationAssignment(HLSLDeclaration* declaration) +{ + if (Accept('=')) { + // Handle array initialization syntax. + if (declaration->type.array) { + int numValues = 0; + if (!Expect('{') || !ParseExpressionList('}', true, declaration->assignment, numValues)) { + return false; + } + } + // else if (IsSamplerType(declaration->type.baseType)) // TODO: should be for SamplerStateBlock, not Sampler + // { + // if (!ParseSamplerState(declaration->assignment)) + // { + // return false; + // } + // } + else if (!ParseExpression(declaration->assignment)) { + return false; + } + } + return true; +} + +bool HLSLParser::ParseFieldDeclaration(HLSLStructField*& field) +{ + field = m_tree->AddNode(GetFileName(), GetLineNumber()); + if (!ExpectDeclaration(false, field->type, field->name)) { + return false; + } + // Handle optional semantics. + if (Accept(':')) { + if (!ExpectIdentifier(field->semantic)) { + return false; + } + } + return Expect(';'); +} + +// @@ Add support for packoffset to general declarations. +/*bool HLSLParser::ParseBufferFieldDeclaration(HLSLBufferField*& field) +{ + field = m_tree->AddNode( GetFileName(), GetLineNumber() ); + if (AcceptDeclaration(false, field->type, field->name)) + { + // Handle optional packoffset. + if (Accept(':')) + { + if (!Expect("packoffset")) + { + return false; + } + const char* constantName = NULL; + const char* swizzleMask = NULL; + if (!Expect('(') || !ExpectIdentifier(constantName) || !Expect('.') || !ExpectIdentifier(swizzleMask) || !Expect(')')) + { + return false; + } + } + return Expect(';'); + } + return false; +}*/ + +bool HLSLParser::CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType) +{ + if (GetTypeCastRank(m_tree, srcType, dstType) == -1) { + const char* srcTypeName = GetTypeNameHLSL(srcType); + const char* dstTypeName = GetTypeNameHLSL(dstType); + m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName); + return false; + } + return true; +} + +bool HLSLParser::ParseExpression(HLSLExpression*& expression) +{ + if (!ParseBinaryExpression(0, expression)) { + return false; + } + + HLSLBinaryOp assignOp; + if (AcceptAssign(assignOp)) { + HLSLExpression* expression2 = NULL; + if (!ParseExpression(expression2)) { + return false; + } + HLSLBinaryExpression* binaryExpression = m_tree->AddNode(expression->fileName, expression->line); + binaryExpression->binaryOp = assignOp; + binaryExpression->expression1 = expression; + binaryExpression->expression2 = expression2; + // This type is not strictly correct, since the type should be a reference. + // However, for our usage of the types it should be sufficient. + binaryExpression->expressionType = expression->expressionType; + + if (!CheckTypeCast(expression2->expressionType, expression->expressionType)) { + const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType); + const char* dstTypeName = GetTypeNameHLSL(expression->expressionType); + m_tokenizer.Error("Cannot implicitly convert from '%s' to '%s'", srcTypeName, dstTypeName); + return false; + } + + expression = binaryExpression; + } + + return true; +} + +bool HLSLParser::AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp) +{ + int token = m_tokenizer.GetToken(); + switch (token) { + case HLSLToken_LogicalAnd: + binaryOp = HLSLBinaryOp_And; + break; + case HLSLToken_LogicalOr: + binaryOp = HLSLBinaryOp_Or; + break; + case '+': + binaryOp = HLSLBinaryOp_Add; + break; + case '-': + binaryOp = HLSLBinaryOp_Sub; + break; + case '*': + binaryOp = HLSLBinaryOp_Mul; + break; + case '/': + binaryOp = HLSLBinaryOp_Div; + break; + case '<': + binaryOp = HLSLBinaryOp_Less; + break; + case '>': + binaryOp = HLSLBinaryOp_Greater; + break; + case HLSLToken_LessEqual: + binaryOp = HLSLBinaryOp_LessEqual; + break; + case HLSLToken_GreaterEqual: + binaryOp = HLSLBinaryOp_GreaterEqual; + break; + case HLSLToken_EqualEqual: + binaryOp = HLSLBinaryOp_Equal; + break; + case HLSLToken_NotEqual: + binaryOp = HLSLBinaryOp_NotEqual; + break; + case '&': + binaryOp = HLSLBinaryOp_BitAnd; + break; + case '|': + binaryOp = HLSLBinaryOp_BitOr; + break; + case '^': + binaryOp = HLSLBinaryOp_BitXor; + break; + default: + return false; + } + if (_binaryOpPriority[binaryOp] > priority) { + m_tokenizer.Next(); + return true; + } + return false; +} + +bool HLSLParser::AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp) +{ + int token = m_tokenizer.GetToken(); + if (token == HLSLToken_PlusPlus) { + unaryOp = pre ? HLSLUnaryOp_PreIncrement : HLSLUnaryOp_PostIncrement; + } + else if (token == HLSLToken_MinusMinus) { + unaryOp = pre ? HLSLUnaryOp_PreDecrement : HLSLUnaryOp_PostDecrement; + } + else if (pre && token == '-') { + unaryOp = HLSLUnaryOp_Negative; + } + else if (pre && token == '+') { + unaryOp = HLSLUnaryOp_Positive; + } + else if (pre && token == '!') { + unaryOp = HLSLUnaryOp_Not; + } + else if (pre && token == '~') { + unaryOp = HLSLUnaryOp_Not; + } + else { + return false; + } + m_tokenizer.Next(); + return true; +} + +bool HLSLParser::AcceptAssign(HLSLBinaryOp& binaryOp) +{ + if (Accept('=')) { + binaryOp = HLSLBinaryOp_Assign; + } + else if (Accept(HLSLToken_PlusEqual)) { + binaryOp = HLSLBinaryOp_AddAssign; + } + else if (Accept(HLSLToken_MinusEqual)) { + binaryOp = HLSLBinaryOp_SubAssign; + } + else if (Accept(HLSLToken_TimesEqual)) { + binaryOp = HLSLBinaryOp_MulAssign; + } + else if (Accept(HLSLToken_DivideEqual)) { + binaryOp = HLSLBinaryOp_DivAssign; + } + else { + return false; + } + return true; +} + +bool HLSLParser::ParseBinaryExpression(int priority, HLSLExpression*& expression) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + bool needsEndParen; + + if (!ParseTerminalExpression(expression, needsEndParen)) { + return false; + } + + // reset priority cause openned parenthesis + if (needsEndParen) + priority = 0; + + while (1) { + HLSLBinaryOp binaryOp; + if (AcceptBinaryOperator(priority, binaryOp)) { + HLSLExpression* expression2 = NULL; + ASSERT(binaryOp < sizeof(_binaryOpPriority) / sizeof(int)); + if (!ParseBinaryExpression(_binaryOpPriority[binaryOp], expression2)) { + return false; + } + HLSLBinaryExpression* binaryExpression = m_tree->AddNode(fileName, line); + binaryExpression->binaryOp = binaryOp; + binaryExpression->expression1 = expression; + binaryExpression->expression2 = expression2; + if (!GetBinaryOpResultType(binaryOp, expression->expressionType, expression2->expressionType, binaryExpression->expressionType)) { + const char* typeName1 = GetTypeNameHLSL(binaryExpression->expression1->expressionType); + const char* typeName2 = GetTypeNameHLSL(binaryExpression->expression2->expressionType); + m_tokenizer.Error("binary '%s' : no global operator found which takes types '%s' and '%s' (or there is no acceptable conversion)", + GetBinaryOpName(binaryOp), typeName1, typeName2); + + return false; + } + + // Propagate constness. + binaryExpression->expressionType.flags = (expression->expressionType.flags | expression2->expressionType.flags) & HLSLTypeFlag_Const; + + expression = binaryExpression; + } + else if (_conditionalOpPriority > priority && Accept('?')) { + HLSLConditionalExpression* conditionalExpression = m_tree->AddNode(fileName, line); + conditionalExpression->condition = expression; + + HLSLExpression* expression1 = NULL; + HLSLExpression* expression2 = NULL; + if (!ParseBinaryExpression(_conditionalOpPriority, expression1) || !Expect(':') || !ParseBinaryExpression(_conditionalOpPriority, expression2)) { + return false; + } + + // Make sure both cases have compatible types. + if (GetTypeCastRank(m_tree, expression1->expressionType, expression2->expressionType) == -1) { + const char* srcTypeName = GetTypeNameHLSL(expression2->expressionType); + const char* dstTypeName = GetTypeNameHLSL(expression1->expressionType); + m_tokenizer.Error("':' no possible conversion from from '%s' to '%s'", srcTypeName, dstTypeName); + return false; + } + + conditionalExpression->trueExpression = expression1; + conditionalExpression->falseExpression = expression2; + conditionalExpression->expressionType = expression1->expressionType; + + expression = conditionalExpression; + } + else { + break; + } + + if (needsEndParen) { + if (!Expect(')')) + return false; + needsEndParen = false; + } + } + + return !needsEndParen || Expect(')'); +} + +bool HLSLParser::ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLConstructorExpression* constructorExpression = m_tree->AddNode(fileName, line); + constructorExpression->type.baseType = type; + constructorExpression->type.typeName = typeName; + int numArguments = 0; + if (!ParseExpressionList(')', false, constructorExpression->argument, numArguments)) { + return false; + } + constructorExpression->expressionType = constructorExpression->type; + constructorExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = constructorExpression; + return true; +} + +bool HLSLParser::ParseTerminalExpression(HLSLExpression*& expression, bool& needsEndParen) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + needsEndParen = false; + + HLSLUnaryOp unaryOp; + if (AcceptUnaryOperator(true, unaryOp)) { + HLSLUnaryExpression* unaryExpression = m_tree->AddNode(fileName, line); + unaryExpression->unaryOp = unaryOp; + if (!ParseTerminalExpression(unaryExpression->expression, needsEndParen)) { + return false; + } + if (unaryOp == HLSLUnaryOp_BitNot) { + if (!IsIntegerType(unaryExpression->expression->expressionType.baseType)) { + const char* typeName = GetTypeNameHLSL(unaryExpression->expression->expressionType); + m_tokenizer.Error("unary '~' : no global operator found which takes type '%s' (or there is no acceptable conversion)", typeName); + return false; + } + } + if (unaryOp == HLSLUnaryOp_Not) { + unaryExpression->expressionType = HLSLType(HLSLBaseType_Bool); + + // Propagate constness. + unaryExpression->expressionType.flags = unaryExpression->expression->expressionType.flags & HLSLTypeFlag_Const; + } + else { + unaryExpression->expressionType = unaryExpression->expression->expressionType; + } + expression = unaryExpression; + return true; + } + + // Expressions inside parenthesis or casts. + if (Accept('(')) { + // Check for a casting operator. + HLSLType type; + if (AcceptType(false, type)) { + // This is actually a type constructor like (float2(... + if (Accept('(')) { + needsEndParen = true; + return ParsePartialConstructor(expression, type.baseType, type.typeName); + } + HLSLCastingExpression* castingExpression = m_tree->AddNode(fileName, line); + castingExpression->type = type; + expression = castingExpression; + castingExpression->expressionType = type; + return Expect(')') && ParseExpression(castingExpression->expression); + } + + if (!ParseExpression(expression) || !Expect(')')) { + return false; + } + } + else { + // Terminal values. + float fValue = 0.0f; + int iValue = 0; + + // literals + if (AcceptFloat(fValue)) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->type = HLSLBaseType_Float; + literalExpression->fValue = fValue; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + return true; + } + if (AcceptHalf(fValue)) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->type = HLSLBaseType_Half; + literalExpression->fValue = fValue; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + return true; + } + if (AcceptInt(iValue)) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->type = HLSLBaseType_Int; + literalExpression->iValue = iValue; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + return true; + } + // TODO: need uint, u/short, double + + // boolean + if (Accept(HLSLToken_True)) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->type = HLSLBaseType_Bool; + literalExpression->bValue = true; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + return true; + } + if (Accept(HLSLToken_False)) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->type = HLSLBaseType_Bool; + literalExpression->bValue = false; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + return true; + } + + // Type constructor. + HLSLType type; + if (AcceptType(/*allowVoid=*/false, type)) { + Expect('('); + if (!ParsePartialConstructor(expression, type.baseType, type.typeName)) { + return false; + } + } + else { + HLSLIdentifierExpression* identifierExpression = m_tree->AddNode(fileName, line); + if (!ExpectIdentifier(identifierExpression->name)) { + return false; + } + + bool undeclaredIdentifier = false; + + const HLSLType* identifierType = FindVariable(identifierExpression->name, identifierExpression->global); + if (identifierType != NULL) { + identifierExpression->expressionType = *identifierType; + } + else { + if (GetIsFunction(identifierExpression->name)) { + // Functions are always global scope. + // TODO: what about member functions? + identifierExpression->global = true; + } + else { + undeclaredIdentifier = true; + } + } + + if (undeclaredIdentifier) { + if (m_allowUndeclaredIdentifiers) { + HLSLLiteralExpression* literalExpression = m_tree->AddNode(fileName, line); + literalExpression->bValue = false; + literalExpression->type = HLSLBaseType_Bool; + literalExpression->expressionType.baseType = literalExpression->type; + literalExpression->expressionType.flags = HLSLTypeFlag_Const; + expression = literalExpression; + } + else { + m_tokenizer.Error("Undeclared identifier '%s'", identifierExpression->name); + return false; + } + } + else { + expression = identifierExpression; + } + } + } + + bool done = false; + while (!done) { + done = true; + + // Post fix unary operator + HLSLUnaryOp unaryOp2; + while (AcceptUnaryOperator(false, unaryOp2)) { + HLSLUnaryExpression* unaryExpression = m_tree->AddNode(fileName, line); + unaryExpression->unaryOp = unaryOp2; + unaryExpression->expression = expression; + unaryExpression->expressionType = unaryExpression->expression->expressionType; + expression = unaryExpression; + done = false; + } + + // Member access operator. + while (Accept('.')) { + // member or member function + const char* text = NULL; + if (!ExpectIdentifier(text)) { + return false; + } + + //const HLSLMemberFuction* memberFunction = FindMemberFunction(text); + //if (function != NULL) + { + // check parent type, and args to see if it's a match + + // copied from intrinsic lookup at end + if (Accept('(')) { + HLSLMemberFunctionCall* functionCall = m_tree->AddNode(fileName, line); + + done = false; + + // parse the args + if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments)) { + return false; + } + + if (expression->nodeType != HLSLNodeType_IdentifierExpression) { + m_tokenizer.Error("Expected function identifier"); + return false; + } + + // This is "tex" of tex.Sample(...) + const HLSLIdentifierExpression* identifierExpression = static_cast(expression); + + // TODO: what if it's a chain of member functions? + functionCall->memberIdentifier = identifierExpression; + + // TODO: lookup texture, buffer, struct for identiferExpression + // TODO: prob need formatType to match half/float return type. + + // TODO: could lookup only float memberFunctions if spirv + // which can't handle fp16 samplers. + + // This is matching to a member function (mostly intrinsics) + const HLSLFunction* function = MatchFunctionCall(functionCall, text, &identifierExpression->expressionType); + if (function == NULL) { + return false; + } + + functionCall->function = function; + functionCall->expressionType = function->returnType; + + // or is it the identiferExpression? + expression = functionCall; + + // for now don't allow chained member functions + return true; + } + } + //else + { + // member variable + HLSLMemberAccess* memberAccess = m_tree->AddNode(fileName, line); + memberAccess->object = expression; + memberAccess->field = text; + + if (!GetMemberType(expression->expressionType, memberAccess)) { + m_tokenizer.Error("Couldn't access '%s'", memberAccess->field); + + // this leaks memberAccess allocated above, but + // all allocated from single allocator, so just free/reset that + return false; + } + expression = memberAccess; + done = false; + } + } + + // Handle array access. + while (Accept('[')) { + HLSLArrayAccess* arrayAccess = m_tree->AddNode(fileName, line); + arrayAccess->array = expression; + if (!ParseExpression(arrayAccess->index) || !Expect(']')) { + return false; + } + + if (expression->expressionType.baseType == HLSLBaseType_UserDefined) { + // some buffer types (!IsGlobalFields) have array notation + arrayAccess->expressionType.baseType = HLSLBaseType_UserDefined; + arrayAccess->expressionType.typeName = expression->expressionType.typeName; + arrayAccess->expressionType.array = true; + arrayAccess->expressionType.arraySize = NULL; + } + else if (expression->expressionType.array) { + arrayAccess->expressionType = expression->expressionType; + arrayAccess->expressionType.array = false; + arrayAccess->expressionType.arraySize = NULL; + } + else { + switch (expression->expressionType.baseType) { + case HLSLBaseType_Float2: + case HLSLBaseType_Float3: + case HLSLBaseType_Float4: + arrayAccess->expressionType.baseType = HLSLBaseType_Float; + break; + case HLSLBaseType_Float2x2: + arrayAccess->expressionType.baseType = HLSLBaseType_Float2; + break; + case HLSLBaseType_Float3x3: + arrayAccess->expressionType.baseType = HLSLBaseType_Float3; + break; + case HLSLBaseType_Float4x4: + arrayAccess->expressionType.baseType = HLSLBaseType_Float4; + break; + + case HLSLBaseType_Half2: + case HLSLBaseType_Half3: + case HLSLBaseType_Half4: + arrayAccess->expressionType.baseType = HLSLBaseType_Half; + break; + case HLSLBaseType_Half2x2: + arrayAccess->expressionType.baseType = HLSLBaseType_Half2; + break; + case HLSLBaseType_Half3x3: + arrayAccess->expressionType.baseType = HLSLBaseType_Half3; + break; + case HLSLBaseType_Half4x4: + arrayAccess->expressionType.baseType = HLSLBaseType_Half4; + break; + + case HLSLBaseType_Double2: + case HLSLBaseType_Double3: + case HLSLBaseType_Double4: + arrayAccess->expressionType.baseType = HLSLBaseType_Double; + break; + case HLSLBaseType_Double2x2: + arrayAccess->expressionType.baseType = HLSLBaseType_Double2; + break; + case HLSLBaseType_Double3x3: + arrayAccess->expressionType.baseType = HLSLBaseType_Double3; + break; + case HLSLBaseType_Double4x4: + arrayAccess->expressionType.baseType = HLSLBaseType_Double4; + break; + + case HLSLBaseType_Int2: + case HLSLBaseType_Int3: + case HLSLBaseType_Int4: + arrayAccess->expressionType.baseType = HLSLBaseType_Int; + break; + case HLSLBaseType_Uint2: + case HLSLBaseType_Uint3: + case HLSLBaseType_Uint4: + arrayAccess->expressionType.baseType = HLSLBaseType_Uint; + break; + case HLSLBaseType_Bool2: + case HLSLBaseType_Bool3: + case HLSLBaseType_Bool4: + arrayAccess->expressionType.baseType = HLSLBaseType_Bool; + break; + case HLSLBaseType_Ushort2: + case HLSLBaseType_Ushort3: + case HLSLBaseType_Ushort4: + arrayAccess->expressionType.baseType = HLSLBaseType_Ushort; + break; + case HLSLBaseType_Short2: + case HLSLBaseType_Short3: + case HLSLBaseType_Short4: + arrayAccess->expressionType.baseType = HLSLBaseType_Short; + break; + case HLSLBaseType_Ulong2: + case HLSLBaseType_Ulong3: + case HLSLBaseType_Ulong4: + arrayAccess->expressionType.baseType = HLSLBaseType_Ulong; + break; + case HLSLBaseType_Long2: + case HLSLBaseType_Long3: + case HLSLBaseType_Long4: + arrayAccess->expressionType.baseType = HLSLBaseType_Long; + break; + + // TODO: u/char + default: + m_tokenizer.Error("array, matrix, vector, or indexable object type expected in index expression"); + return false; + } + } + + expression = arrayAccess; + done = false; + } + + // Handle function calls. Note, HLSL functions aren't like C function + // pointers -- we can only directly call on an identifier, not on an + // expression. + if (Accept('(')) { + HLSLFunctionCall* functionCall = m_tree->AddNode(fileName, line); + done = false; + if (!ParseExpressionList(')', false, functionCall->argument, functionCall->numArguments)) { + return false; + } + + if (expression->nodeType != HLSLNodeType_IdentifierExpression) { + m_tokenizer.Error("Expected function identifier"); + return false; + } + + const HLSLIdentifierExpression* identifierExpression = static_cast(expression); + const HLSLFunction* function = MatchFunctionCall(functionCall, identifierExpression->name); + if (function == NULL) { + return false; + } + + functionCall->function = function; + functionCall->expressionType = function->returnType; + expression = functionCall; + } + } + return true; +} + +bool HLSLParser::ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpression*& firstExpression, int& numExpressions) +{ + numExpressions = 0; + HLSLExpression* lastExpression = NULL; + while (!Accept(endToken)) { + if (CheckForUnexpectedEndOfStream(endToken)) { + return false; + } + if (numExpressions > 0 && !Expect(',')) { + return false; + } + // It is acceptable for the final element in the initialization list to + // have a trailing comma in some cases, like array initialization such as {1, 2, 3,} + if (allowEmptyEnd && Accept(endToken)) { + break; + } + HLSLExpression* expression = NULL; + if (!ParseExpression(expression)) { + return false; + } + if (firstExpression == NULL) { + firstExpression = expression; + } + else { + lastExpression->nextExpression = expression; + } + lastExpression = expression; + ++numExpressions; + } + return true; +} + +bool HLSLParser::ParseArgumentList(HLSLArgument*& firstArgument, int& numArguments, int& numOutputArguments) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLArgument* lastArgument = NULL; + numArguments = 0; + + while (!Accept(')')) { + if (CheckForUnexpectedEndOfStream(')')) { + return false; + } + if (numArguments > 0 && !Expect(',')) { + return false; + } + + HLSLArgument* argument = m_tree->AddNode(fileName, line); + + // what is unifor modifier ? + if (Accept(HLSLToken_Uniform)) { + argument->modifier = HLSLArgumentModifier_Uniform; + } + + else if (Accept(HLSLToken_In)) { + argument->modifier = HLSLArgumentModifier_In; + } + else if (Accept(HLSLToken_Out)) { + argument->modifier = HLSLArgumentModifier_Out; + } + else if (Accept(HLSLToken_InOut)) { + argument->modifier = HLSLArgumentModifier_Inout; + } + else if (Accept(HLSLToken_Const)) { + argument->modifier = HLSLArgumentModifier_Const; + } + + if (!ExpectDeclaration(/*allowUnsizedArray=*/true, argument->type, argument->name)) { + return false; + } + + DeclareVariable(argument->name, argument->type); + + // Optional semantic. + if (Accept(':') && !ExpectIdentifier(argument->semantic)) { + return false; + } + + if (Accept('=') && !ParseExpression(argument->defaultValue)) { + // @@ Print error! + return false; + } + + if (lastArgument != NULL) { + lastArgument->nextArgument = argument; + } + else { + firstArgument = argument; + } + lastArgument = argument; + + ++numArguments; + if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) { + ++numOutputArguments; + } + } + return true; +} + +/* +bool HLSLParser::ParseSamplerState(HLSLExpression*& expression) +{ + if (!Expect(HLSLToken_SamplerState)) + { + return false; + } + + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLSamplerState* samplerState = m_tree->AddNode(fileName, line); + + if (!Expect('{')) + { + return false; + } + + HLSLStateAssignment* lastStateAssignment = NULL; + + // Parse state assignments. + while (!Accept('}')) + { + if (CheckForUnexpectedEndOfStream('}')) + { + return false; + } + + HLSLStateAssignment* stateAssignment = NULL; + if (!ParseStateAssignment(stateAssignment, true, false)) + { + return false; + } + ASSERT(stateAssignment != NULL); + if (lastStateAssignment == NULL) + { + samplerState->stateAssignments = stateAssignment; + } + else + { + lastStateAssignment->nextStateAssignment = stateAssignment; + } + lastStateAssignment = stateAssignment; + samplerState->numStateAssignments++; + } + + expression = samplerState; + return true; +} + +bool HLSLParser::ParseTechnique(HLSLStatement*& statement) +{ + if (!Accept(HLSLToken_Technique)) { + return false; + } + + const char* techniqueName = NULL; + if (!ExpectIdentifier(techniqueName)) + { + return false; + } + + if (!Expect('{')) + { + return false; + } + + HLSLTechnique* technique = m_tree->AddNode(GetFileName(), GetLineNumber()); + technique->name = techniqueName; + + //m_techniques.PushBack(technique); + + HLSLPass* lastPass = NULL; + + // Parse state assignments. + while (!Accept('}')) + { + if (CheckForUnexpectedEndOfStream('}')) + { + return false; + } + + HLSLPass* pass = NULL; + if (!ParsePass(pass)) + { + return false; + } + ASSERT(pass != NULL); + if (lastPass == NULL) + { + technique->passes = pass; + } + else + { + lastPass->nextPass = pass; + } + lastPass = pass; + technique->numPasses++; + } + + statement = technique; + return true; +} + +bool HLSLParser::ParsePass(HLSLPass*& pass) +{ + if (!Accept(HLSLToken_Pass)) { + return false; + } + + // Optional pass name. + const char* passName = NULL; + AcceptIdentifier(passName); + + if (!Expect('{')) + { + return false; + } + + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + pass = m_tree->AddNode(fileName, line); + pass->name = passName; + + HLSLStateAssignment* lastStateAssignment = NULL; + + // Parse state assignments. + while (!Accept('}')) + { + if (CheckForUnexpectedEndOfStream('}')) + { + return false; + } + + HLSLStateAssignment* stateAssignment = NULL; + if (!ParseStateAssignment(stateAssignment, false, false)) + { + return false; + } + ASSERT(stateAssignment != NULL); + if (lastStateAssignment == NULL) + { + pass->stateAssignments = stateAssignment; + } + else + { + lastStateAssignment->nextStateAssignment = stateAssignment; + } + lastStateAssignment = stateAssignment; + pass->numStateAssignments++; + } + return true; +} + + +bool HLSLParser::ParsePipeline(HLSLStatement*& statement) +{ + if (!Accept("pipeline")) { + return false; + } + + // Optional pipeline name. + const char* pipelineName = NULL; + AcceptIdentifier(pipelineName); + + if (!Expect('{')) + { + return false; + } + + HLSLPipeline* pipeline = m_tree->AddNode(GetFileName(), GetLineNumber()); + pipeline->name = pipelineName; + + HLSLStateAssignment* lastStateAssignment = NULL; + + // Parse state assignments. + while (!Accept('}')) + { + if (CheckForUnexpectedEndOfStream('}')) + { + return false; + } + + HLSLStateAssignment* stateAssignment = NULL; + if (!ParseStateAssignment(stateAssignment, false, true)) + { + return false; + } + ASSERT(stateAssignment != NULL); + if (lastStateAssignment == NULL) + { + pipeline->stateAssignments = stateAssignment; + } + else + { + lastStateAssignment->nextStateAssignment = stateAssignment; + } + lastStateAssignment = stateAssignment; + pipeline->numStateAssignments++; + } + + statement = pipeline; + return true; +} + + +const EffectState* GetEffectState(const char* name, bool isSamplerState, bool isPipeline) +{ + const EffectState* validStates = effectStates; + int count = sizeof(effectStates)/sizeof(effectStates[0]); + + if (isPipeline) + { + validStates = pipelineStates; + count = sizeof(pipelineStates) / sizeof(pipelineStates[0]); + } + + if (isSamplerState) + { + validStates = samplerStates; + count = sizeof(samplerStates)/sizeof(samplerStates[0]); + } + + // Case insensitive comparison. + for (int i = 0; i < count; i++) + { + if (String_EqualNoCase(name, validStates[i].name)) + { + return &validStates[i]; + } + } + + return NULL; +} + +static const EffectStateValue* GetStateValue(const char* name, const EffectState* state) +{ + // Case insensitive comparison. + for (int i = 0; ; i++) + { + const EffectStateValue & value = state->values[i]; + if (value.name == NULL) break; + + if (String_EqualNoCase(name, value.name)) + { + return &value; + } + } + + return NULL; +} + + +bool HLSLParser::ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState *& state) +{ + if (m_tokenizer.GetToken() != HLSLToken_Identifier) + { + char near[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(near); + m_tokenizer.Error("Syntax error: expected identifier near '%s'", near); + return false; + } + + state = GetEffectState(m_tokenizer.GetIdentifier(), isSamplerState, isPipelineState); + if (state == NULL) + { + m_tokenizer.Error("Syntax error: unexpected identifier '%s'", m_tokenizer.GetIdentifier()); + return false; + } + + m_tokenizer.Next(); + return true; +} + +bool HLSLParser::ParseColorMask(int& mask) +{ + mask = 0; + + do { + if (m_tokenizer.GetToken() == HLSLToken_IntLiteral) { + mask |= m_tokenizer.GetInt(); + } + else if (m_tokenizer.GetToken() == HLSLToken_Identifier) { + const char * ident = m_tokenizer.GetIdentifier(); + const EffectStateValue * stateValue = colorMaskValues; + while (stateValue->name != NULL) { + if (String_EqualNoCase(stateValue->name, ident)) { + mask |= stateValue->value; + break; + } + ++stateValue; + } + } + else { + return false; + } + m_tokenizer.Next(); + } while (Accept('|')); + + return true; +} + +bool HLSLParser::ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment) +{ + const bool expectsExpression = state->values == colorMaskValues; + const bool expectsInteger = state->values == integerValues; + const bool expectsFloat = state->values == floatValues; + const bool expectsBoolean = state->values == booleanValues; + + if (!expectsExpression && !expectsInteger && !expectsFloat && !expectsBoolean) + { + if (m_tokenizer.GetToken() != HLSLToken_Identifier) + { + char near[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(near); + m_tokenizer.Error("Syntax error: expected identifier near '%s'", near); + stateAssignment->iValue = 0; + return false; + } + } + + if (state->values == NULL) + { + if (strcmp(m_tokenizer.GetIdentifier(), "compile") != 0) + { + m_tokenizer.Error("Syntax error: unexpected identifier '%s' expected compile statement", m_tokenizer.GetIdentifier()); + stateAssignment->iValue = 0; + return false; + } + + // @@ Parse profile name, function name, argument expressions. + + // Skip the rest of the compile statement. + while(m_tokenizer.GetToken() != ';') + { + m_tokenizer.Next(); + } + } + else { + if (expectsInteger) + { + if (!AcceptInt(stateAssignment->iValue)) + { + m_tokenizer.Error("Syntax error: expected integer near '%s'", m_tokenizer.GetIdentifier()); + stateAssignment->iValue = 0; + return false; + } + } + else if (expectsFloat) + { + if (!AcceptFloat(stateAssignment->fValue)) + { + m_tokenizer.Error("Syntax error: expected float near '%s'", m_tokenizer.GetIdentifier()); + stateAssignment->iValue = 0; + return false; + } + } + else if (expectsBoolean) + { + const EffectStateValue * stateValue = GetStateValue(m_tokenizer.GetIdentifier(), state); + + if (stateValue != NULL) + { + stateAssignment->iValue = stateValue->value; + + m_tokenizer.Next(); + } + else if (AcceptInt(stateAssignment->iValue)) + { + stateAssignment->iValue = (stateAssignment->iValue != 0); + } + else { + m_tokenizer.Error("Syntax error: expected bool near '%s'", m_tokenizer.GetIdentifier()); + stateAssignment->iValue = 0; + return false; + } + } + else if (expectsExpression) + { + if (!ParseColorMask(stateAssignment->iValue)) + { + m_tokenizer.Error("Syntax error: expected color mask near '%s'", m_tokenizer.GetIdentifier()); + stateAssignment->iValue = 0; + return false; + } + } + else + { + // Expect one of the allowed values. + const EffectStateValue * stateValue = GetStateValue(m_tokenizer.GetIdentifier(), state); + + if (stateValue == NULL) + { + m_tokenizer.Error("Syntax error: unexpected value '%s' for state '%s'", m_tokenizer.GetIdentifier(), state->name); + stateAssignment->iValue = 0; + return false; + } + + stateAssignment->iValue = stateValue->value; + + m_tokenizer.Next(); + } + } + + return true; +} + +bool HLSLParser::ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + stateAssignment = m_tree->AddNode(fileName, line); + + const EffectState * state; + if (!ParseStateName(isSamplerState, isPipelineState, stateAssignment->stateName, state)) { + return false; + } + + //stateAssignment->name = m_tree->AddString(m_tokenizer.GetIdentifier()); + stateAssignment->stateName = state->name; + stateAssignment->d3dRenderState = state->d3drs; + + if (!Expect('=')) { + return false; + } + + if (!ParseStateValue(state, stateAssignment)) { + return false; + } + + if (!Expect(';')) { + return false; + } + + return true; +} +*/ + +bool HLSLParser::ParseAttributeList(HLSLAttribute*& firstAttribute) +{ + const char* fileName = GetFileName(); + int line = GetLineNumber(); + + HLSLAttribute* lastAttribute = firstAttribute; + do { + const char* identifier = NULL; + if (!ExpectIdentifier(identifier)) { + return false; + } + + HLSLAttribute* attribute = m_tree->AddNode(fileName, line); + + if (String_Equal(identifier, "unroll")) + attribute->attributeType = HLSLAttributeType_Unroll; + else if (String_Equal(identifier, "flatten")) + attribute->attributeType = HLSLAttributeType_Flatten; + else if (String_Equal(identifier, "branch")) + attribute->attributeType = HLSLAttributeType_Branch; + else if (String_Equal(identifier, "nofastmath")) + attribute->attributeType = HLSLAttributeType_NoFastMath; + + // @@ parse arguments, () not required if attribute constructor has no arguments. + + if (firstAttribute == NULL) { + firstAttribute = attribute; + } + else { + lastAttribute->nextAttribute = attribute; + } + lastAttribute = attribute; + + } while (Accept(',')); + + return true; +} + +// Attributes can have all these forms: +// [A] statement; +// [A,B] statement; +// [A][B] statement; +// These are not supported yet: +// [A] statement [B]; +// [A()] statement; +// [A(a)] statement; +bool HLSLParser::ParseAttributeBlock(HLSLAttribute*& attribute) +{ + HLSLAttribute** lastAttribute = &attribute; + while (*lastAttribute != NULL) { + lastAttribute = &(*lastAttribute)->nextAttribute; + } + + if (!Accept('[')) { + return false; + } + + // Parse list of attribute constructors. + ParseAttributeList(*lastAttribute); + + if (!Expect(']')) { + return false; + } + + // Parse additional [] blocks. + ParseAttributeBlock(*lastAttribute); + + return true; +} + +/* never completed +bool HLSLParser::ParseStage(HLSLStatement*& statement) +{ + if (!Accept("stage")) + { + return false; + } + + // Required stage name. + const char* stageName = NULL; + if (!ExpectIdentifier(stageName)) + { + return false; + } + + if (!Expect('{')) + { + return false; + } + + HLSLStage* stage = m_tree->AddNode(GetFileName(), GetLineNumber()); + stage->name = stageName; + + BeginScope(); + + HLSLType voidType(HLSLBaseType_Void); + if (!Expect('{') || !ParseBlock(stage->statement, voidType)) + { + return false; + } + + EndScope(); + + // @@ To finish the stage definition we should traverse the statements recursively (including function calls) and find all the input/output declarations. + + statement = stage; + return true; +} +*/ + +bool HLSLParser::Parse(HLSLTree* tree, const HLSLParserOptions& options) +{ + m_tree = tree; + m_options = options; + + HLSLRoot* root = m_tree->GetRoot(); + HLSLStatement* lastStatement = NULL; + + while (!Accept(HLSLToken_EndOfStream)) { + HLSLStatement* statement = NULL; + if (!ParseTopLevel(statement)) { + return false; + } + if (statement != NULL) { + if (lastStatement == NULL) { + root->statement = statement; + } + else { + lastStatement->nextStatement = statement; + } + lastStatement = statement; + while (lastStatement->nextStatement) lastStatement = lastStatement->nextStatement; + } + } + return true; +} + +bool HLSLParser::AcceptTypeModifier(int& flags) +{ + if (Accept(HLSLToken_Const)) { + flags |= HLSLTypeFlag_Const; + return true; + } + else if (Accept(HLSLToken_Static)) { + flags |= HLSLTypeFlag_Static; + return true; + } + else if (Accept(HLSLToken_Uniform)) { + //flags |= HLSLTypeFlag_Uniform; // @@ Ignored. + return true; + } + else if (Accept(HLSLToken_Inline)) { + //flags |= HLSLTypeFlag_Uniform; // @@ Ignored. In HLSL all functions are inline. + return true; + } + /*else if (Accept("in")) + { + flags |= HLSLTypeFlag_Input; + return true; + } + else if (Accept("out")) + { + flags |= HLSLTypeFlag_Output; + return true; + }*/ + + // Not an usage keyword. + return false; +} + +bool HLSLParser::AcceptInterpolationModifier(int& flags) +{ + if (Accept("linear")) { + flags |= HLSLTypeFlag_Linear; + return true; + } + else if (Accept("centroid")) { + flags |= HLSLTypeFlag_Centroid; + return true; + } + else if (Accept("nointerpolation")) { + flags |= HLSLTypeFlag_NoInterpolation; + return true; + } + else if (Accept("noperspective")) { + flags |= HLSLTypeFlag_NoPerspective; + return true; + } + else if (Accept("sample")) { + flags |= HLSLTypeFlag_Sample; + return true; + } + + return false; +} + +bool HLSLParser::AcceptType(bool allowVoid, HLSLType& type /*, bool acceptFlags*/) +{ + //if (type.flags != NULL) + { + type.flags = 0; + while (AcceptTypeModifier(type.flags) || AcceptInterpolationModifier(type.flags)) { + } + } + + int token = m_tokenizer.GetToken(); + + if (token == HLSLToken_Comment) { + // TODO: should this advance the tokenizer? + // m_tokenizer.Next(); + + type.baseType = HLSLBaseType_Comment; + return true; + } + + // Check built-in types. + type.baseType = HLSLBaseType_Void; + switch (token) { + case HLSLToken_Float: + type.baseType = HLSLBaseType_Float; + break; + case HLSLToken_Float2: + type.baseType = HLSLBaseType_Float2; + break; + case HLSLToken_Float3: + type.baseType = HLSLBaseType_Float3; + break; + case HLSLToken_Float4: + type.baseType = HLSLBaseType_Float4; + break; + + case HLSLToken_Float2x2: + type.baseType = HLSLBaseType_Float2x2; + break; + case HLSLToken_Float3x3: + type.baseType = HLSLBaseType_Float3x3; + break; + case HLSLToken_Float4x4: + type.baseType = HLSLBaseType_Float4x4; + break; + + // The parser is remapping the type here + case HLSLToken_Halfio: + type.baseType = m_options.isHalfio ? HLSLBaseType_Half : HLSLBaseType_Float; + break; + case HLSLToken_Half2io: + type.baseType = m_options.isHalfio ? HLSLBaseType_Half2 : HLSLBaseType_Float2; + break; + case HLSLToken_Half3io: + type.baseType = m_options.isHalfio ? HLSLBaseType_Half3 : HLSLBaseType_Float3; + break; + case HLSLToken_Half4io: + type.baseType = m_options.isHalfio ? HLSLBaseType_Half4 : HLSLBaseType_Float4; + break; + + // The parser is remapping the type here + case HLSLToken_Halfst: + type.baseType = m_options.isHalfst ? HLSLBaseType_Half : HLSLBaseType_Float; + break; + case HLSLToken_Half2st: + type.baseType = m_options.isHalfst ? HLSLBaseType_Half2 : HLSLBaseType_Float2; + break; + case HLSLToken_Half3st: + type.baseType = m_options.isHalfst ? HLSLBaseType_Half3 : HLSLBaseType_Float3; + break; + case HLSLToken_Half4st: + type.baseType = m_options.isHalfst ? HLSLBaseType_Half4 : HLSLBaseType_Float4; + break; + + case HLSLToken_Half: + type.baseType = HLSLBaseType_Half; + break; + case HLSLToken_Half2: + type.baseType = HLSLBaseType_Half2; + break; + case HLSLToken_Half3: + type.baseType = HLSLBaseType_Half3; + break; + case HLSLToken_Half4: + type.baseType = HLSLBaseType_Half4; + break; + + case HLSLToken_Half2x2: + type.baseType = HLSLBaseType_Half2x2; + break; + case HLSLToken_Half3x3: + type.baseType = HLSLBaseType_Half3x3; + break; + case HLSLToken_Half4x4: + type.baseType = HLSLBaseType_Half4x4; + break; + + case HLSLToken_Bool: + type.baseType = HLSLBaseType_Bool; + break; + case HLSLToken_Bool2: + type.baseType = HLSLBaseType_Bool2; + break; + case HLSLToken_Bool3: + type.baseType = HLSLBaseType_Bool3; + break; + case HLSLToken_Bool4: + type.baseType = HLSLBaseType_Bool4; + break; + + case HLSLToken_Int: + type.baseType = HLSLBaseType_Int; + break; + case HLSLToken_Int2: + type.baseType = HLSLBaseType_Int2; + break; + case HLSLToken_Int3: + type.baseType = HLSLBaseType_Int3; + break; + case HLSLToken_Int4: + type.baseType = HLSLBaseType_Int4; + break; + + case HLSLToken_Uint: + type.baseType = HLSLBaseType_Uint; + break; + case HLSLToken_Uint2: + type.baseType = HLSLBaseType_Uint2; + break; + case HLSLToken_Uint3: + type.baseType = HLSLBaseType_Uint3; + break; + case HLSLToken_Uint4: + type.baseType = HLSLBaseType_Uint4; + break; + + case HLSLToken_Ushort: + type.baseType = HLSLBaseType_Ushort; + break; + case HLSLToken_Ushort2: + type.baseType = HLSLBaseType_Ushort2; + break; + case HLSLToken_Ushort3: + type.baseType = HLSLBaseType_Ushort3; + break; + case HLSLToken_Ushort4: + type.baseType = HLSLBaseType_Ushort4; + break; + + case HLSLToken_Short: + type.baseType = HLSLBaseType_Short; + break; + case HLSLToken_Short2: + type.baseType = HLSLBaseType_Short2; + break; + case HLSLToken_Short3: + type.baseType = HLSLBaseType_Short3; + break; + case HLSLToken_Short4: + type.baseType = HLSLBaseType_Short4; + break; + + // Textures (TODO: could have baseType be texture, with subtype like buffer) + case HLSLToken_Texture2D: + type.baseType = HLSLBaseType_Texture2D; + break; + case HLSLToken_Texture2DArray: + type.baseType = HLSLBaseType_Texture2DArray; + break; + case HLSLToken_Texture3D: + type.baseType = HLSLBaseType_Texture3D; + break; + case HLSLToken_TextureCube: + type.baseType = HLSLBaseType_TextureCube; + break; + case HLSLToken_Texture2DMS: + type.baseType = HLSLBaseType_Texture2DMS; + break; + case HLSLToken_TextureCubeArray: + type.baseType = HLSLBaseType_TextureCubeArray; + break; + + case HLSLToken_Depth2D: + type.baseType = HLSLBaseType_Depth2D; + break; + case HLSLToken_Depth2DArray: + type.baseType = HLSLBaseType_Depth2DArray; + break; + case HLSLToken_DepthCube: + type.baseType = HLSLBaseType_DepthCube; + break; + + case HLSLToken_RWTexture2D: + type.baseType = HLSLBaseType_RWTexture2D; + break; + + // samplers + case HLSLToken_SamplerState: + type.baseType = HLSLBaseType_SamplerState; + break; + case HLSLToken_SamplerComparisonState: + type.baseType = HLSLBaseType_SamplerComparisonState; + break; + + // older constants + case HLSLToken_CBuffer: + case HLSLToken_TBuffer: + // might make these BufferGlobals? + type.baseType = HLSLBaseType_Buffer; + break; + + // SSBO + case HLSLToken_StructuredBuffer: + case HLSLToken_RWStructuredBuffer: + case HLSLToken_ByteAddressBuffer: + case HLSLToken_RWByteAddressBuffer: + case HLSLToken_ConstantBuffer: + type.baseType = HLSLBaseType_Buffer; + break; + } + if (type.baseType != HLSLBaseType_Void) { + m_tokenizer.Next(); + + if (IsTextureType(type.baseType)) { + // Parse optional sampler type. + if (Accept('<')) { + token = m_tokenizer.GetToken(); + + // TODO: need more format types + // TODO: double, u/long, and other types + if (token >= HLSLToken_Float && token <= HLSLToken_Float4) { + // TODO: code only tests if texture formatType exactly matches + // when looking for Intrinsics, need to fix that before changing + // this. + + type.formatType = HLSLBaseType_Float; + // (HLSLBaseType)(HLSLBaseType_Float + (token - HLSLToken_Float)); + } + else if (token >= HLSLToken_Half && token <= HLSLToken_Half4) { + type.formatType = HLSLBaseType_Half; + // (HLSLBaseType)(HLSLBaseType_Half + (token - HLSLToken_Half)); + } + else { + m_tokenizer.Error("Expected half or float format type on texture."); + return false; + } + m_tokenizer.Next(); + + if (!Expect('>')) { + return false; + } + } + } + return true; + } + + if (allowVoid && Accept(HLSLToken_Void)) { + type.baseType = HLSLBaseType_Void; + return true; + } + if (token == HLSLToken_Identifier) { + const char* identifier = m_tree->AddString(m_tokenizer.GetIdentifier()); + if (FindUserDefinedType(identifier) != NULL) { + m_tokenizer.Next(); + + type.baseType = HLSLBaseType_UserDefined; + type.typeName = identifier; + return true; + } + } + return false; +} + +bool HLSLParser::ExpectType(bool allowVoid, HLSLType& type) +{ + if (!AcceptType(allowVoid, type)) { + m_tokenizer.Error("Expected type"); + return false; + } + return true; +} + +bool HLSLParser::AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name) +{ + if (!AcceptType(/*allowVoid=*/false, type)) { + return false; + } + + if (!ExpectIdentifier(name)) { + // TODO: false means we didn't accept a declaration and we had an error! + return false; + } + // Handle array syntax. + if (Accept('[')) { + type.array = true; + // Optionally allow no size to the specified for the array. + if (Accept(']') && allowUnsizedArray) { + return true; + } + if (!ParseExpression(type.arraySize) || !Expect(']')) { + return false; + } + } + return true; +} + +bool HLSLParser::ExpectDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name) +{ + if (!AcceptDeclaration(allowUnsizedArray, type, name)) { + m_tokenizer.Error("Expected declaration"); + return false; + } + return true; +} + +const HLSLStruct* HLSLParser::FindUserDefinedType(const char* name) const +{ + // Pointer comparison is sufficient for strings since they exist in the + // string pool. + for (int i = 0; i < m_userTypes.GetSize(); ++i) { + if (m_userTypes[i]->name == name) { + return m_userTypes[i]; + } + } + return NULL; +} + +bool HLSLParser::CheckForUnexpectedEndOfStream(int endToken) +{ + if (Accept(HLSLToken_EndOfStream)) { + char what[HLSLTokenizer::s_maxIdentifier]; + m_tokenizer.GetTokenName(endToken, what); + m_tokenizer.Error("Unexpected end of file while looking for '%s'", what); + return true; + } + return false; +} + +int HLSLParser::GetLineNumber() const +{ + return m_tokenizer.GetLineNumber(); +} + +const char* HLSLParser::GetFileName() +{ + return m_tree->AddString(m_tokenizer.GetFileName()); +} + +void HLSLParser::BeginScope() +{ + // Use NULL as a sentinel that indices a new scope level. + Variable& variable = m_variables.PushBackNew(); + variable.name = NULL; +} + +void HLSLParser::EndScope() +{ + int numVariables = m_variables.GetSize() - 1; + while (m_variables[numVariables].name != NULL) { + --numVariables; + ASSERT(numVariables >= 0); + } + m_variables.Resize(numVariables); +} + +const HLSLType* HLSLParser::FindVariable(const char* name, bool& global) const +{ + for (int i = m_variables.GetSize() - 1; i >= 0; --i) { + if (m_variables[i].name == name) { + global = (i < m_numGlobals); + return &m_variables[i].type; + } + } + return NULL; +} + +// This only search user-defined c-style functions. Intrinsics are not in this. +const HLSLFunction* HLSLParser::FindFunction(const char* name) const +{ + for (int i = 0; i < m_functions.GetSize(); ++i) { + if (m_functions[i]->name == name) { + return m_functions[i]; + } + } + return NULL; +} + +static bool AreTypesEqual(HLSLTree* tree, const HLSLType& lhs, const HLSLType& rhs) +{ + return GetTypeCastRank(tree, lhs, rhs) == 0; +} + +static bool AreArgumentListsEqual(HLSLTree* tree, HLSLArgument* lhs, HLSLArgument* rhs) +{ + while (lhs && rhs) { + if (!AreTypesEqual(tree, lhs->type, rhs->type)) + return false; + + if (lhs->modifier != rhs->modifier) + return false; + + if (lhs->semantic != rhs->semantic || lhs->sv_semantic != rhs->sv_semantic) + return false; + + lhs = lhs->nextArgument; + rhs = rhs->nextArgument; + } + + return lhs == NULL && rhs == NULL; +} + +const HLSLFunction* HLSLParser::FindFunction(const HLSLFunction* fun) const +{ + for (int i = 0; i < m_functions.GetSize(); ++i) { + if (m_functions[i]->name == fun->name && + AreTypesEqual(m_tree, m_functions[i]->returnType, fun->returnType) && + AreArgumentListsEqual(m_tree, m_functions[i]->argument, fun->argument)) { + return m_functions[i]; + } + } + return NULL; +} + +void HLSLParser::DeclareVariable(const char* name, const HLSLType& type) +{ + if (m_variables.GetSize() == m_numGlobals) { + ++m_numGlobals; + } + Variable& variable = m_variables.PushBackNew(); + variable.name = name; + variable.type = type; +} + +bool HLSLParser::GetIsFunction(const char* name) const +{ + // check user defined functions + for (int i = 0; i < m_functions.GetSize(); ++i) { + // == is ok here because we're passed the strings through the string pool. + if (m_functions[i]->name == name) { + return true; + } + } + + // see if it's an intrinsic + const auto& it = _intrinsicRangeMap.find(name); + return it != _intrinsicRangeMap.end(); +} + +const HLSLFunction* HLSLParser::MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType) +{ + const HLSLFunction* matchedFunction = NULL; + + //int numArguments = functionCall->numArguments; + int numMatchedOverloads = 0; + bool nameMatches = false; + + // Get the user defined c functions with the specified name. + // There may be more than one, and these are not ordered. + for (int i = 0; i < m_functions.GetSize(); ++i) { + const HLSLFunction* function = m_functions[i]; + if (function->name == name) { + nameMatches = true; + + // if caller requests member function, then memberType must match + bool isMemberFunc = function->IsMemberFunction(); + + if (memberType) { + if (!isMemberFunc) + continue; + + if (memberType->baseType != function->memberType) + continue; + + if (memberType->formatType != GetScalarType(function->returnType.baseType)) + continue; + } + else { + if (isMemberFunc) + continue; + } + + CompareFunctionsResult result = CompareFunctions(m_tree, functionCall, function, matchedFunction); + if (result == Function1Better) { + matchedFunction = function; + numMatchedOverloads = 1; + } + else if (result == FunctionsEqual) { + ++numMatchedOverloads; + } + } + } + + // Get the intrinsic functions with the specified name. + const auto& iter = _intrinsicRangeMap.find(name); + if (iter != _intrinsicRangeMap.end()) { + Range range = iter->second; + for (int i = 0; i < range.count; ++i) { + uint32_t idx = range.start + i; + const HLSLFunction* function = &_intrinsics[idx].function; + + // if caller requests member function, then memberType must match + bool isMemberFunc = function->IsMemberFunction(); + if (memberType) { + if (!isMemberFunc) + break; + + if (memberType->baseType != function->memberType) + continue; + + if (memberType->formatType != GetScalarType(function->returnType.baseType)) + continue; + } + else { + if (isMemberFunc) + break; + } + ASSERT(String_Equal(function->name, name)); + + nameMatches = true; + + CompareFunctionsResult result = CompareFunctions(m_tree, functionCall, function, matchedFunction); + if (result == Function1Better) { + matchedFunction = function; + numMatchedOverloads = 1; + } + else if (result == FunctionsEqual) { + ++numMatchedOverloads; + } + } + } + + if (matchedFunction != NULL && numMatchedOverloads > 1) { + // Multiple overloads match. + m_tokenizer.Error("'%s' %d overloads have similar conversions", name, numMatchedOverloads); + return NULL; + } + else if (matchedFunction == NULL) { + if (nameMatches) { + m_tokenizer.Error("'%s' no overloaded function matched all of the arguments", name); + } + else { + m_tokenizer.Error("Undeclared identifier '%s'", name); + } + } + + return matchedFunction; +} + +inline bool IsSwizzle(char c) +{ + return c == 'x' || c == 'y' || c == 'z' || c == 'w' || + c == 'r' || c == 'g' || c == 'b' || c == 'a'; +} + +bool HLSLParser::GetMemberType(const HLSLType& objectType, HLSLMemberAccess* memberAccess) +{ + const char* fieldName = memberAccess->field; + + HLSLBaseType baseType = objectType.baseType; + + // pull field from struct + if (baseType == HLSLBaseType_UserDefined) { + const HLSLStruct* structure = FindUserDefinedType(objectType.typeName); + ASSERT(structure != NULL); + if (structure == NULL) + return false; + + const HLSLStructField* field = structure->field; + while (field != NULL) { + if (field->name == fieldName) { + memberAccess->expressionType = field->type; + return true; + } + field = field->nextField; + } + + return false; + } + + if (baseTypeDescriptions[objectType.baseType].numericType == NumericType_NaN) { + // Currently we don't have an non-numeric types that allow member access. + return false; + } + + int swizzleLength = 0; + + if (IsScalarType(baseType) || IsVectorType(baseType)) { + // Check for a swizzle on the scalar/vector types. + for (int i = 0; fieldName[i] != 0; ++i) { + if (!IsSwizzle(fieldName[i])) { + m_tokenizer.Error("Invalid swizzle '%s'", fieldName); + return false; + } + ++swizzleLength; + } + ASSERT(swizzleLength > 0); + if (swizzleLength == 0) + return false; + } + else if (IsMatrixType(baseType)) { + // Check for a matrix element access (e.g. _m00 or _11) + + const char* n = fieldName; + while (n[0] == '_') { + ++n; + int base = 1; + if (n[0] == 'm') { + base = 0; + ++n; + } + if (!isdigit(n[0]) || !isdigit(n[1])) { + m_tokenizer.Error("Invalid matrix digit"); + return false; + } + + int r = (n[0] - '0') - base; + int c = (n[1] - '0') - base; + if (r >= baseTypeDescriptions[objectType.baseType].height) { + m_tokenizer.Error("Invalid matrix dimension %d", r); + return false; + } + if (c >= baseTypeDescriptions[objectType.baseType].numComponents) { + m_tokenizer.Error("Invalid matrix dimension %d", c); + return false; + } + ++swizzleLength; + n += 2; + } + + if (n[0] != 0) { + return false; + } + } + else { + return false; + } + + if (swizzleLength > 4) { + m_tokenizer.Error("Invalid swizzle '%s'", fieldName); + return false; + } + + switch (baseTypeDescriptions[objectType.baseType].numericType) { + case NumericType_Float: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Float + swizzleLength - 1); + break; + case NumericType_Half: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Half + swizzleLength - 1); + break; + case NumericType_Double: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Double + swizzleLength - 1); + break; + + case NumericType_Int: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Int + swizzleLength - 1); + break; + case NumericType_Uint: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Uint + swizzleLength - 1); + break; + case NumericType_Bool: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Bool + swizzleLength - 1); + break; + case NumericType_Short: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Short + swizzleLength - 1); + break; + case NumericType_Ushort: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ushort + swizzleLength - 1); + break; + case NumericType_Long: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Long + swizzleLength - 1); + break; + case NumericType_Ulong: + memberAccess->expressionType.baseType = (HLSLBaseType)(HLSLBaseType_Ulong + swizzleLength - 1); + break; + // TODO: u/char + default: + ASSERT(false); + } + + memberAccess->swizzle = true; + + return true; +} + +} //namespace M4 diff --git a/hlslparser/src/HLSLParser.h b/hlslparser/src/HLSLParser.h new file mode 100644 index 00000000..f963babf --- /dev/null +++ b/hlslparser/src/HLSLParser.h @@ -0,0 +1,219 @@ +//============================================================================= +// +// Render/HLSLParser.h +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#pragma once + +#include "Engine.h" +#include "HLSLTokenizer.h" +#include "HLSLTree.h" + +namespace M4 { + +struct EffectState; + +// This wouldn't be needed if could preprocess prior to calling parser. +struct HLSLParserOptions { + bool isHalfst = false; + + bool isHalfio = false; +}; + +class HLSLParser { +public: + HLSLParser(Allocator* allocator, const char* fileName, const char* buffer, size_t length); + void SetKeepComments(bool enable) { m_tokenizer.SetKeepComments(enable); } + + bool Parse(HLSLTree* tree, const HLSLParserOptions& options = HLSLParserOptions()); + +private: + bool Accept(int token); + bool Expect(int token); + + /** + * Special form of Accept for accepting a word that is not actually a token + * but will be treated like one. This is useful for HLSL keywords that are + * only tokens in specific contexts (like in/inout in parameter lists). + */ + bool Accept(const char* token); + bool Expect(const char* token); + + bool AcceptIdentifier(const char*& identifier); + bool ExpectIdentifier(const char*& identifier); + bool AcceptFloat(float& value); + bool AcceptHalf(float& value); + bool AcceptInt(int& value); + bool AcceptType(bool allowVoid, HLSLType& type); + bool ExpectType(bool allowVoid, HLSLType& type); + bool AcceptBinaryOperator(int priority, HLSLBinaryOp& binaryOp); + bool AcceptUnaryOperator(bool pre, HLSLUnaryOp& unaryOp); + bool AcceptAssign(HLSLBinaryOp& binaryOp); + bool AcceptTypeModifier(int& typeFlags); + bool AcceptInterpolationModifier(int& flags); + + /** + * Handles a declaration like: "float2 name[5]". If allowUnsizedArray is true, it is + * is acceptable for the declaration to not specify the bounds of the array (i.e. name[]). + */ + bool AcceptDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name); + bool ExpectDeclaration(bool allowUnsizedArray, HLSLType& type, const char*& name); + + bool ParseTopLevel(HLSLStatement*& statement); + bool ParseBlock(HLSLStatement*& firstStatement, const HLSLType& returnType); + bool ParseStatementOrBlock(HLSLStatement*& firstStatement, const HLSLType& returnType, bool scoped = true); + bool ParseStatement(HLSLStatement*& statement, const HLSLType& returnType); + bool ParseDeclaration(HLSLDeclaration*& declaration); + bool ParseFieldDeclaration(HLSLStructField*& field); + //bool ParseBufferFieldDeclaration(HLSLBufferField*& field); + bool ParseExpression(HLSLExpression*& expression); + bool ParseBinaryExpression(int priority, HLSLExpression*& expression); + bool ParseTerminalExpression(HLSLExpression*& expression, bool& needsEndParen); + bool ParseExpressionList(int endToken, bool allowEmptyEnd, HLSLExpression*& firstExpression, int& numExpressions); + bool ParseArgumentList(HLSLArgument*& firstArgument, int& numArguments, int& numOutputArguments); + bool ParseDeclarationAssignment(HLSLDeclaration* declaration); + bool ParsePartialConstructor(HLSLExpression*& expression, HLSLBaseType type, const char* typeName); + + bool ParseStateName(bool isSamplerState, bool isPipelineState, const char*& name, const EffectState*& state); + bool ParseColorMask(int& mask); + + // FX file + // bool ParseStateValue(const EffectState * state, HLSLStateAssignment* stateAssignment); + // bool ParseStateAssignment(HLSLStateAssignment*& stateAssignment, bool isSamplerState, bool isPipelineState); + // bool ParseSamplerState(HLSLExpression*& expression); + // bool ParseTechnique(HLSLStatement*& statement); + // bool ParsePass(HLSLPass*& pass); + // bool ParsePipeline(HLSLStatement*& pipeline); + // bool ParseStage(HLSLStatement*& stage); + + bool ParseComment(HLSLStatement*& statement); + + bool ParseAttributeList(HLSLAttribute*& attribute); + bool ParseAttributeBlock(HLSLAttribute*& attribute); + + bool CheckForUnexpectedEndOfStream(int endToken); + + const HLSLStruct* FindUserDefinedType(const char* name) const; + + void BeginScope(); + void EndScope(); + + void DeclareVariable(const char* name, const HLSLType& type); + + /// Returned pointer is only valid until Declare or Begin/EndScope is called. + const HLSLType* FindVariable(const char* name, bool& global) const; + + const HLSLFunction* FindFunction(const char* name) const; + const HLSLFunction* FindFunction(const HLSLFunction* fun) const; + + bool GetIsFunction(const char* name) const; + + /// Finds the overloaded function that matches the specified call. + /// Pass memberType to match member functions. + const HLSLFunction* MatchFunctionCall(const HLSLFunctionCall* functionCall, const char* name, const HLSLType* memberType = NULL); + + /// Gets the type of the named field on the specified object type (fieldName can also specify a swizzle. ) + bool GetMemberType(const HLSLType& objectType, HLSLMemberAccess* memberAccess); + + bool CheckTypeCast(const HLSLType& srcType, const HLSLType& dstType); + + const char* GetFileName(); + int GetLineNumber() const; + +private: + struct Variable { + const char* name; + HLSLType type; + }; + + HLSLTokenizer m_tokenizer; + Array m_userTypes; + Array m_variables; + Array m_functions; + int m_numGlobals; + + HLSLTree* m_tree; + + bool m_allowUndeclaredIdentifiers = false; + bool m_disableSemanticValidation = false; + + HLSLParserOptions m_options; +}; + +enum NumericType { + NumericType_Float, + NumericType_Half, + NumericType_Double, // not in MSL + + NumericType_Bool, + NumericType_Int, + NumericType_Uint, + NumericType_Short, + NumericType_Ushort, + NumericType_Ulong, + NumericType_Long, + + // TODO: HLSL doesn't have byte/ubyte, MSL does + // NumericType_UByte, + // NumericType_Byte, + + NumericType_Count, + + NumericType_NaN, // not in count? +}; + +bool IsHalf(HLSLBaseType type); +bool IsFloat(HLSLBaseType type); +bool IsDouble(HLSLBaseType type); + +bool IsInt(HLSLBaseType type); +bool IsUnit(HLSLBaseType type); +bool IsShort(HLSLBaseType type); +bool IsUshort(HLSLBaseType type); +bool IsLong(HLSLBaseType type); +bool IsUlong(HLSLBaseType type); +bool IsBool(HLSLBaseType type); + +bool IsSamplerType(HLSLBaseType baseType); +bool IsMatrixType(HLSLBaseType baseType); +bool IsVectorType(HLSLBaseType baseType); +bool IsScalarType(HLSLBaseType baseType); +bool IsTextureType(HLSLBaseType baseType); +bool IsDepthTextureType(HLSLBaseType baseType); +bool IsBufferType(HLSLBaseType baseType); +bool IsNumericType(HLSLBaseType baseType); + +bool IsFloatingType(HLSLBaseType type); +bool IsIntegerType(HLSLBaseType type); + +bool IsCoreTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType); +bool IsNumericTypeEqual(HLSLBaseType lhsType, HLSLBaseType rhsType); +bool IsDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType); +bool IsCrossDimensionEqual(HLSLBaseType lhsType, HLSLBaseType rhsType); + +bool IsScalarType(const HLSLType& type); +bool IsVectorType(const HLSLType& type); +bool IsMatrixType(const HLSLType& type); + +bool IsSamplerType(const HLSLType& type); +bool IsTextureType(const HLSLType& type); + +HLSLBaseType PromoteType(HLSLBaseType toType, HLSLBaseType type); +HLSLBaseType HalfToFloatBaseType(HLSLBaseType type); +HLSLBaseType DoubleToFloatBaseType(HLSLBaseType type); + +const char* GetNumericTypeName(HLSLBaseType type); + +const char* GetTypeNameHLSL(const HLSLType& type); +const char* GetTypeNameMetal(const HLSLType& type); + +HLSLBaseType GetScalarType(HLSLBaseType type); + +// returns 1 for scalar or 2/3/4 for vector types. +int32_t GetVectorDimension(HLSLBaseType type); + +} //namespace M4 diff --git a/hlslparser/src/HLSLTokenizer.cpp b/hlslparser/src/HLSLTokenizer.cpp new file mode 100644 index 00000000..c8d89983 --- /dev/null +++ b/hlslparser/src/HLSLTokenizer.cpp @@ -0,0 +1,741 @@ +#include "HLSLTokenizer.h" + +#include +#include +#include +#include +#include + +#include "Engine.h" + +namespace M4 { +// The order here must match the order in the Token enum. +static const char* _reservedWords[] = + { + "float", + "float2", + "float3", + "float4", + "float2x2", + "float3x3", + "float4x4", + + // for Nvidia/Adreno + "halfio", + "half2io", + "half3io", + "half4io", + + // for Android + "halfst", + "half2st", + "half3st", + "half4st", + + "half", + "half2", + "half3", + "half4", + "half2x2", + "half3x3", + "half4x4", + + "double", + "double2", + "double3", + "double4", + "double2x2", + "double3x3", + "double4x4", + + "bool", + "bool2", + "bool3", + "bool4", + + "int", + "int2", + "int3", + "int4", + + "uint", + "uint2", + "uint3", + "uint4", + + "short", + "short2", + "short3", + "short4", + + "ushort", + "ushort2", + "ushort3", + "ushort4", + + "long", + "long2", + "long3", + "long4", + + "ulong", + "ulong2", + "ulong3", + "ulong4", + + // TODO: u/char + + "Texture2D", + "Texture3D", + "TextureCube", + "Texture2DArray", + "TextureCubeArray", + "Texture2DMS", + + "Depth2D", + "Depth2DArray", // cascades + "DepthCube", + + "RWTexture2D", + + "SamplerState", + "SamplerComparisonState", + + "if", + "else", + "for", + "while", + "break", + "true", + "false", + "void", + "struct", + + // DX9 buffer types (tons of globals) + "cbuffer", + "tbuffer", + + // DX10 buffer templated types + "ConstantBuffer", // indexable cbuffer + "StructuredBuffer", + "RWStructuredBuffer", + "ByteAddressBuffer", + "RWByteAddressBuffer", + + "register", + "return", + "continue", + "discard", + + "const", + "static", + "inline", + + "uniform", + "in", + "out", + "inout", + + "#include", + + // these are from fx file + //"sampler_state", + //"technique", + //"pass", +}; + +static bool GetIsSymbol(char c) +{ + switch (c) { + case ';': + case ':': + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '-': + case '+': + case '*': + case '/': + case '?': + case '!': + case ',': + case '=': + case '.': + case '<': + case '>': + case '|': + case '&': + case '^': + case '~': + case '@': + return true; + } + return false; +} + +/** Returns true if the character is a valid token separator at the end of a number type token */ +static bool GetIsNumberSeparator(char c) +{ + return c == 0 || isspace(c) || GetIsSymbol(c); +} + +HLSLTokenizer::HLSLTokenizer(const char* fileName, const char* buffer, size_t length) +{ + m_buffer = buffer; + m_bufferEnd = buffer + length; + m_fileName = fileName; + m_lineNumber = 1; + m_tokenLineNumber = 1; + m_error = false; + Next(); +} + +void HLSLTokenizer::Next() +{ + while (SkipWhitespace() || SkipComment() || ScanLineDirective() || SkipPragmaDirective() || SkipInclude()) { + } + + if (m_error) { + m_token = HLSLToken_EndOfStream; + return; + } + + m_tokenLineNumber = m_lineNumber; + + if (m_buffer >= m_bufferEnd || *m_buffer == '\0') { + m_token = HLSLToken_EndOfStream; + return; + } + + const char* start = m_buffer; + + // single line comments + if (m_keepComments && (m_buffer[0] == '/' && m_buffer[1] == '/')) { + m_token = HLSLToken_Comment; + m_buffer += 2; + + m_comment[0] = 0; + + // How to count the remaining string as tokens of the comment + // typically expecting a single string, not a sequence of strings. + + // skip the newline too, but would need to increment lineNumber + uint32_t commentLen = 0; + while (m_buffer < m_bufferEnd) { + if (*(m_buffer) == '\n') { + m_buffer++; + m_lineNumber++; + break; + } + + // store comment to temporary string + if (commentLen < (s_maxComment - 1)) + m_comment[commentLen++] = *m_buffer; + + m_buffer++; + } + + m_comment[commentLen] = 0; + + return; + } + + // +=, -=, *=, /=, ==, <=, >= + if (m_buffer[0] == '+' && m_buffer[1] == '=') { + m_token = HLSLToken_PlusEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '-' && m_buffer[1] == '=') { + m_token = HLSLToken_MinusEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '*' && m_buffer[1] == '=') { + m_token = HLSLToken_TimesEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '/' && m_buffer[1] == '=') { + m_token = HLSLToken_DivideEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '=' && m_buffer[1] == '=') { + m_token = HLSLToken_EqualEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '!' && m_buffer[1] == '=') { + m_token = HLSLToken_NotEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '<' && m_buffer[1] == '=') { + m_token = HLSLToken_LessEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '>' && m_buffer[1] == '=') { + m_token = HLSLToken_GreaterEqual; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '&' && m_buffer[1] == '&') { + m_token = HLSLToken_LogicalAnd; + m_buffer += 2; + return; + } + else if (m_buffer[0] == '|' && m_buffer[1] == '|') { + m_token = HLSLToken_LogicalOr; + m_buffer += 2; + return; + } + + // ++, -- + if ((m_buffer[0] == '-' && m_buffer[1] == '-')) { + m_token = HLSLToken_MinusMinus; + m_buffer += 2; + return; + } + if ((m_buffer[0] == '+' && m_buffer[1] == '+')) { + m_token = HLSLToken_PlusPlus; + m_buffer += 2; + return; + } + + // Check for the start of a number. + if (ScanNumber()) { + return; + } + + if (GetIsSymbol(m_buffer[0])) { + m_token = static_cast(m_buffer[0]); + ++m_buffer; + return; + } + + // Must be an identifier or a reserved word. + while (m_buffer < m_bufferEnd && m_buffer[0] != 0 && !GetIsSymbol(m_buffer[0]) && !isspace(m_buffer[0])) { + ++m_buffer; + } + + size_t length = m_buffer - start; + memcpy(m_identifier, start, length); + m_identifier[length] = 0; + + const int numReservedWords = sizeof(_reservedWords) / sizeof(const char*); + for (int i = 0; i < numReservedWords; ++i) { + // TODO: remove O(N) search of strings, need unordered_map + if (String_Equal(_reservedWords[i], m_identifier)) { + m_token = 256 + i; + return; + } + } + + m_token = HLSLToken_Identifier; +} + +bool HLSLTokenizer::SkipInclude() +{ + bool result = false; + + static const char* keyword = "#include"; + static uint32_t keywordLen = (uint32_t)strlen(keyword); + + if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) { + m_buffer += keywordLen; + result = true; + while (m_buffer < m_bufferEnd) { + if (*(m_buffer++) == '\n') { + ++m_lineNumber; + break; + } + } + } + return result; +} + +bool HLSLTokenizer::SkipWhitespace() +{ + bool result = false; + while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) { + result = true; + if (m_buffer[0] == '\n') { + ++m_lineNumber; + } + ++m_buffer; + } + return result; +} + +bool HLSLTokenizer::SkipComment() +{ + bool result = false; + if (m_buffer[0] == '/') { + if ((!m_keepComments) && m_buffer[1] == '/') { + // Single line comment. + result = true; + m_buffer += 2; + while (m_buffer < m_bufferEnd) { + if (*(m_buffer++) == '\n') { + ++m_lineNumber; + break; + } + } + } + else if (m_buffer[1] == '*') { + // Multi-line comment. + result = true; + m_buffer += 2; + while (m_buffer < m_bufferEnd) { + if (m_buffer[0] == '\n') { + ++m_lineNumber; + } + if (m_buffer[0] == '*' && m_buffer[1] == '/') { + break; + } + ++m_buffer; + } + if (m_buffer < m_bufferEnd) { + m_buffer += 2; + } + } + } + return result; +} + +bool HLSLTokenizer::SkipPragmaDirective() +{ + bool result = false; + + static const char* keyword = "#include"; + static uint32_t keywordLen = (uint32_t)strlen(keyword); + + if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) { + m_buffer += keywordLen; + result = true; + while (m_buffer < m_bufferEnd) { + if (*(m_buffer++) == '\n') { + ++m_lineNumber; + break; + } + } + } + + return result; +} + +bool HLSLTokenizer::ScanNumber() +{ + // Don't treat the + or - as part of the number. + if (m_buffer[0] == '+' || m_buffer[0] == '-') { + return false; + } + + // Parse hex literals. + if (m_bufferEnd - m_buffer > 2 && m_buffer[0] == '0' && m_buffer[1] == 'x') { + char* hEnd = NULL; + int iValue = (int)String_ToIntHex(m_buffer + 2, &hEnd); + if (GetIsNumberSeparator(hEnd[0])) { + m_buffer = hEnd; + m_token = HLSLToken_IntLiteral; // TODO: handle uint, etc. + m_iValue = iValue; + return true; + } + } + + char* fEnd = NULL; + double fValue = String_ToDouble(m_buffer, &fEnd); + + if (fEnd == m_buffer) { + return false; + } + + char* iEnd = NULL; + int iValue = String_ToInt(m_buffer, &iEnd); + + // TODO: handle lf, etc. Double not really worth adding, since it's + // so hobbled. + + // If the character after the number is an f then the f is treated as part + // of the number (to handle 1.0f syntax). + bool isHalf = false; + if ((fEnd[0] == 'f' || fEnd[0] == 'h') && fEnd < m_bufferEnd) { + isHalf = fEnd[0] == 'h'; + ++fEnd; + } + + if (fEnd > iEnd && GetIsNumberSeparator(fEnd[0])) { + m_buffer = fEnd; + m_token = (isHalf || fEnd[0] == 'h') ? HLSLToken_HalfLiteral : HLSLToken_FloatLiteral; + m_fValue = static_cast(fValue); + return true; + } + else if (iEnd > m_buffer && GetIsNumberSeparator(iEnd[0])) { + m_buffer = iEnd; + m_token = HLSLToken_IntLiteral; // TODO: uint/short/ushort + m_iValue = iValue; + return true; + } + + return false; +} + +bool HLSLTokenizer::ScanLineDirective() +{ + static const char* keyword = "#line"; + static uint32_t keywordLen = (uint32_t)strlen(keyword); + + if (strncmp(m_buffer, keyword, keywordLen) == 0 && isspace(m_buffer[keywordLen])) { + m_buffer += keywordLen; + + while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) { + if (m_buffer[0] == '\n') { + Error("Syntax error: expected line number after #line"); + return false; + } + ++m_buffer; + } + + char* iEnd = NULL; + int lineNumber = String_ToInt(m_buffer, &iEnd); + + if (!isspace(*iEnd)) { + Error("Syntax error: expected line number after #line"); + return false; + } + + m_buffer = iEnd; + while (m_buffer < m_bufferEnd && isspace(m_buffer[0])) { + char c = m_buffer[0]; + ++m_buffer; + if (c == '\n') { + m_lineNumber = lineNumber; + return true; + } + } + + if (m_buffer >= m_bufferEnd) { + m_lineNumber = lineNumber; + return true; + } + + if (m_buffer[0] != '"') { + Error("Syntax error: expected '\"' after line number near #line"); + return false; + } + + ++m_buffer; + + int i = 0; + while (i + 1 < s_maxIdentifier && m_buffer < m_bufferEnd && m_buffer[0] != '"') { + if (m_buffer[0] == '\n') { + Error("Syntax error: expected '\"' before end of line near #line"); + return false; + } + + m_lineDirectiveFileName[i] = *m_buffer; + ++m_buffer; + ++i; + } + + m_lineDirectiveFileName[i] = 0; + + if (m_buffer >= m_bufferEnd) { + Error("Syntax error: expected '\"' before end of file near #line"); + return false; + } + + if (i + 1 >= s_maxIdentifier) { + Error("Syntax error: file name too long near #line"); + return false; + } + + // Skip the closing quote + ++m_buffer; + + while (m_buffer < m_bufferEnd && m_buffer[0] != '\n') { + if (!isspace(m_buffer[0])) { + Error("Syntax error: unexpected input after file name near #line"); + return false; + } + ++m_buffer; + } + + // Skip new line + ++m_buffer; + + m_lineNumber = lineNumber; + m_fileName = m_lineDirectiveFileName; + + return true; + } + + return false; +} + +int HLSLTokenizer::GetToken() const +{ + return m_token; +} + +float HLSLTokenizer::GetFloat() const +{ + return m_fValue; +} + +int HLSLTokenizer::GetInt() const +{ + return m_iValue; +} + +const char* HLSLTokenizer::GetIdentifier() const +{ + return m_identifier; +} + +const char* HLSLTokenizer::GetComment() const +{ + return m_comment; +} + +int HLSLTokenizer::GetLineNumber() const +{ + return m_tokenLineNumber; +} + +const char* HLSLTokenizer::GetFileName() const +{ + return m_fileName; +} + +void HLSLTokenizer::Error(const char* format, ...) +{ + // It's not always convenient to stop executing when an error occurs, + // so just track once we've hit an error and stop reporting them until + // we successfully bail out of execution. + if (m_error) { + return; + } + m_error = true; + + va_list args; + va_start(args, format); + Log_ErrorArgList(format, args, m_fileName, m_lineNumber); + va_end(args); + + // can log error/warning/info messages + //bool isError = true; + + // Gcc/lcang convention (must be absolute filename for clickthrough) + // Visual Stuidio can pick up on this formatting too + //Log_Error("%s:%d: %s: %s\n", m_fileName, m_lineNumber, isError ? "error" : "warning", buffer); +} + +void HLSLTokenizer::GetTokenName(char buffer[s_maxIdentifier]) const +{ + if (m_token == HLSLToken_FloatLiteral || m_token == HLSLToken_HalfLiteral) { + snprintf(buffer, s_maxIdentifier, "%f", m_fValue); + + String_StripTrailingFloatZeroes(buffer); + } + else if (m_token == HLSLToken_IntLiteral) { + snprintf(buffer, s_maxIdentifier, "%d", m_iValue); + } + // TODO: short/ushort/uint + else if (m_token == HLSLToken_Identifier) { + String_Copy(buffer, m_identifier, s_maxIdentifier); + } + else { + GetTokenName(m_token, buffer); + } +} + +void HLSLTokenizer::GetTokenName(int token, char buffer[s_maxIdentifier]) +{ + // ascii + if (token < 256) { + buffer[0] = (char)token; + buffer[1] = 0; + } + else if (token < HLSLToken_LessEqual) { + strcpy(buffer, _reservedWords[token - 256]); + } + else { + switch (token) { + case HLSLToken_PlusPlus: + strcpy(buffer, "++"); + break; + case HLSLToken_MinusMinus: + strcpy(buffer, "--"); + break; + + case HLSLToken_PlusEqual: + strcpy(buffer, "+="); + break; + case HLSLToken_MinusEqual: + strcpy(buffer, "-="); + break; + case HLSLToken_TimesEqual: + strcpy(buffer, "*="); + break; + case HLSLToken_DivideEqual: + strcpy(buffer, "/="); + break; + + // DONE: Missing several token types + case HLSLToken_LessEqual: + strcpy(buffer, "<="); + break; + case HLSLToken_GreaterEqual: + strcpy(buffer, ">="); + break; + case HLSLToken_EqualEqual: + strcpy(buffer, "=="); + break; + case HLSLToken_NotEqual: + strcpy(buffer, "!="); + break; + + case HLSLToken_LogicalAnd: + strcpy(buffer, "&&"); + break; + case HLSLToken_LogicalOr: + strcpy(buffer, "||"); + break; + + // literals + case HLSLToken_HalfLiteral: + strcpy(buffer, "half"); + break; + case HLSLToken_FloatLiteral: + strcpy(buffer, "float"); + break; + case HLSLToken_IntLiteral: + strcpy(buffer, "int"); + break; + // TODO: need uint, short, ushort + + case HLSLToken_Identifier: + strcpy(buffer, "identifier"); + break; + case HLSLToken_EndOfStream: + strcpy(buffer, ""); + break; + + case HLSLToken_Comment: + strcpy(buffer, "comment"); + break; + + default: + strcpy(buffer, "unknown"); + break; + } + } +} + +} //namespace M4 diff --git a/hlslparser/src/HLSLTokenizer.h b/hlslparser/src/HLSLTokenizer.h new file mode 100644 index 00000000..b48d451a --- /dev/null +++ b/hlslparser/src/HLSLTokenizer.h @@ -0,0 +1,250 @@ +#pragma once + +#include "Engine.h" + +namespace M4 { + +/** In addition to the values in this enum, all of the ASCII characters are +valid tokens. */ +enum HLSLToken { + // The order here must match the order in the _reservedWords + + // Built-in types. + HLSLToken_Float = 256, + HLSLToken_Float2, + HLSLToken_Float3, + HLSLToken_Float4, + HLSLToken_Float2x2, + HLSLToken_Float3x3, + HLSLToken_Float4x4, + + // for Nvidia/Adreno + HLSLToken_Halfio, + HLSLToken_Half2io, + HLSLToken_Half3io, + HLSLToken_Half4io, + + // for Android w/o fp16 storage + HLSLToken_Halfst, + HLSLToken_Half2st, + HLSLToken_Half3st, + HLSLToken_Half4st, + + HLSLToken_Half, + HLSLToken_Half2, + HLSLToken_Half3, + HLSLToken_Half4, + HLSLToken_Half2x2, + HLSLToken_Half3x3, + HLSLToken_Half4x4, + + HLSLToken_Double, + HLSLToken_Double2, + HLSLToken_Double3, + HLSLToken_Double4, + HLSLToken_Double2x2, + HLSLToken_Double3x3, + HLSLToken_Double4x4, + + HLSLToken_Bool, + HLSLToken_Bool2, + HLSLToken_Bool3, + HLSLToken_Bool4, + + HLSLToken_Int, + HLSLToken_Int2, + HLSLToken_Int3, + HLSLToken_Int4, + + HLSLToken_Uint, + HLSLToken_Uint2, + HLSLToken_Uint3, + HLSLToken_Uint4, + + HLSLToken_Short, + HLSLToken_Short2, + HLSLToken_Short3, + HLSLToken_Short4, + + HLSLToken_Ushort, + HLSLToken_Ushort2, + HLSLToken_Ushort3, + HLSLToken_Ushort4, + + HLSLToken_Long, + HLSLToken_Long2, + HLSLToken_Long3, + HLSLToken_Long4, + + HLSLToken_Ulong, + HLSLToken_Ulong2, + HLSLToken_Ulong3, + HLSLToken_Ulong4, + + // TODO: u/char + HLSLToken_Texture2D, + HLSLToken_Texture3D, + HLSLToken_TextureCube, + HLSLToken_Texture2DArray, + HLSLToken_TextureCubeArray, + HLSLToken_Texture2DMS, + + HLSLToken_Depth2D, + HLSLToken_Depth2DArray, + HLSLToken_DepthCube, + // TODO: other depth types + + HLSLToken_RWTexture2D, + + HLSLToken_SamplerState, + HLSLToken_SamplerComparisonState, + + // Reserved words. + HLSLToken_If, + HLSLToken_Else, + HLSLToken_For, + HLSLToken_While, + HLSLToken_Break, + HLSLToken_True, + HLSLToken_False, + HLSLToken_Void, + HLSLToken_Struct, + + // dx9 + HLSLToken_CBuffer, + HLSLToken_TBuffer, + + // dx10 templated types (TODO: hook to parser and generator) + HLSLToken_ConstantBuffer, + HLSLToken_StructuredBuffer, + HLSLToken_RWStructuredBuffer, + // HLSLToken_AppendStructuredBuffer, + // HLSLToken_ConsumeStructuredBuffer, + HLSLToken_ByteAddressBuffer, + HLSLToken_RWByteAddressBuffer, + // RWTexture, and other types + + HLSLToken_Register, + HLSLToken_Return, + HLSLToken_Continue, + HLSLToken_Discard, + + HLSLToken_Const, + HLSLToken_Static, + HLSLToken_Inline, + + // Input modifiers. + HLSLToken_Uniform, + HLSLToken_In, + HLSLToken_Out, + HLSLToken_InOut, + + // Effect keywords. + //HLSLToken_SamplerStateBlock, + //HLSLToken_Technique, + //HLSLToken_Pass, + + // These all start with # + HLSLToken_Include, + // HLSLToken_Pragma + // HLSLToken_Line + + //=================== + // End of strings that have to match in _reservedWords in .cpp + + // Multi-character symbols. + HLSLToken_LessEqual, + HLSLToken_GreaterEqual, + HLSLToken_EqualEqual, + HLSLToken_NotEqual, + HLSLToken_PlusPlus, + HLSLToken_MinusMinus, + HLSLToken_PlusEqual, + HLSLToken_MinusEqual, + HLSLToken_TimesEqual, + HLSLToken_DivideEqual, + HLSLToken_LogicalAnd, // && + HLSLToken_LogicalOr, // || + + // Other token types. + HLSLToken_FloatLiteral, + HLSLToken_HalfLiteral, + HLSLToken_IntLiteral, + + HLSLToken_Identifier, + HLSLToken_Comment, // Alec added this + + HLSLToken_EndOfStream, +}; + +class HLSLTokenizer { +public: + /// Maximum string length of an identifier. + constexpr static int s_maxIdentifier = 255 + 1; + constexpr static int s_maxComment = 4096; + + /// The file name is only used for error reporting. + HLSLTokenizer(const char* fileName, const char* buffer, size_t length); + + /// Advances to the next token in the stream. + void Next(); + + /// Returns the current token in the stream. + int GetToken() const; + + /// Returns the number of the current token. + float GetFloat() const; + int GetInt() const; + + /// Returns the identifier for the current token. + const char* GetIdentifier() const; + + /// Returns the comment text for the current token. + const char* GetComment() const; + + /// Returns the line number where the current token began. + int GetLineNumber() const; + + /// Returns the file name where the current token began. + const char* GetFileName() const; + + /// Gets a human readable text description of the current token. + void GetTokenName(char buffer[s_maxIdentifier]) const; + + /// Reports an error using printf style formatting. The current line number + /// is included. Only the first error reported will be output. + void Error(const char* format, ...) M4_PRINTF_ATTR(2, 3); + + /// Gets a human readable text description of the specified token. + static void GetTokenName(int token, char buffer[s_maxIdentifier]); + + /// Tokenizer will default to strip double-slash comments, but this tries to preserve them if true + void SetKeepComments(bool enable) { m_keepComments = enable; } + +private: + bool SkipWhitespace(); + bool SkipComment(); + bool SkipPragmaDirective(); + bool SkipInclude(); + + bool ScanNumber(); + bool ScanLineDirective(); + +private: + const char* m_fileName = nullptr; + const char* m_buffer = nullptr; + const char* m_bufferEnd = nullptr; + int m_lineNumber = 0; + bool m_error = false; + + int m_token = 0; + float m_fValue = 0.0f; + int m_iValue = 0; + char m_identifier[s_maxIdentifier] = {}; + char m_comment[s_maxComment] = {}; + char m_lineDirectiveFileName[s_maxIdentifier] = {}; + int m_tokenLineNumber = 0; + bool m_keepComments = false; +}; + +} //namespace M4 diff --git a/hlslparser/src/HLSLTree.cpp b/hlslparser/src/HLSLTree.cpp new file mode 100644 index 00000000..4332b6b5 --- /dev/null +++ b/hlslparser/src/HLSLTree.cpp @@ -0,0 +1,1877 @@ +#include "HLSLTree.h" + +#include "Engine.h" + +namespace M4 { + +// TODO: split helper calls out to new .h, so can include that +// over to HLSLParser.cpp +extern bool IsSamplerType(const HLSLType &type); + +extern bool IsScalarType(HLSLBaseType type); +extern bool IsIntegerType(HLSLBaseType type); +extern bool IsFloatingType(HLSLBaseType type); + +extern int32_t GetVectorDimension(HLSLBaseType type); + +HLSLTree::HLSLTree(Allocator *allocator) : m_allocator(allocator), m_stringPool(allocator) +{ + m_firstPage = m_allocator->New(); + m_firstPage->next = NULL; + + m_currentPage = m_firstPage; + m_currentPageOffset = 0; + + m_root = AddNode(NULL, 1); +} + +HLSLTree::~HLSLTree() +{ + NodePage *page = m_firstPage; + while (page != NULL) { + NodePage *next = page->next; + m_allocator->Delete(page); + page = next; + } +} + +void HLSLTree::AllocatePage() +{ + NodePage *newPage = m_allocator->New(); + newPage->next = NULL; + m_currentPage->next = newPage; + m_currentPageOffset = 0; + m_currentPage = newPage; +} + +const char *HLSLTree::AddString(const char *string) +{ + return m_stringPool.AddString(string); +} + +const char *HLSLTree::AddStringFormat(const char *format, ...) +{ + va_list args; + va_start(args, format); + const char *string = m_stringPool.AddStringFormatList(format, args); + va_end(args); + return string; +} + +bool HLSLTree::GetContainsString(const char *string) const +{ + return m_stringPool.GetContainsString(string); +} + +HLSLRoot *HLSLTree::GetRoot() const +{ + return m_root; +} + +void *HLSLTree::AllocateMemory(size_t size) +{ + if (m_currentPageOffset + size > s_nodePageSize) { + AllocatePage(); + } + void *buffer = m_currentPage->buffer + m_currentPageOffset; + m_currentPageOffset += size; + return buffer; +} + +// @@ This doesn't do any parameter matching. Simply returns the first function with that name. +HLSLFunction *HLSLTree::FindFunction(const char *name) +{ + HLSLStatement *statement = m_root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Function) { + HLSLFunction *function = (HLSLFunction *)statement; + if (String_Equal(name, function->name)) { + return function; + } + } + + statement = statement->nextStatement; + } + + return NULL; +} + +HLSLDeclaration *HLSLTree::FindGlobalDeclaration(const char *name, HLSLBuffer **buffer_out /*=NULL*/) +{ + HLSLStatement *statement = m_root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration *declaration = (HLSLDeclaration *)statement; + if (String_Equal(name, declaration->name)) { + if (buffer_out) *buffer_out = NULL; + return declaration; + } + } + else if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer *buffer = (HLSLBuffer *)statement; + + // This searches the fields to find the buffer, + // since cbuffer/tbuffer represent globals. + if (buffer->IsGlobalFields()) { + HLSLDeclaration *field = buffer->field; + while (field != NULL) { + ASSERT(field->nodeType == HLSLNodeType_Declaration); + if (String_Equal(name, field->name)) { + if (buffer_out) *buffer_out = buffer; + return field; + } + field = (HLSLDeclaration *)field->nextStatement; + } + } + else { + if (String_Equal(name, buffer->name)) { + if (buffer_out) *buffer_out = buffer; + return NULL; + } + + /* This isn't same type... + + // Note: should pass buffers, but buffer/texture + // and cbuffer fields can be global to entire shader. + + // find struct first + const HLSLStruct* bufferStruct = buffer->bufferStruct; + + // new search those for the fields + HLSLStructField* field = bufferStruct->field; + while (field != NULL) + { + ASSERT(field->nodeType == HLSLNodeType_Declaration); + if (String_Equal(name, field->name)) + { + if (buffer_out) *buffer_out = buffer; + return field; + } + field = field->nextField; + } + */ + } + } + + statement = statement->nextStatement; + } + + if (buffer_out) *buffer_out = NULL; + return NULL; +} + +HLSLStruct *HLSLTree::FindGlobalStruct(const char *name) +{ + HLSLStatement *statement = m_root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Struct) { + HLSLStruct *declaration = (HLSLStruct *)statement; + if (String_Equal(name, declaration->name)) { + return declaration; + } + } + + statement = statement->nextStatement; + } + + return NULL; +} + +/* FX files +HLSLTechnique * HLSLTree::FindTechnique(const char * name) +{ + HLSLStatement * statement = m_root->statement; + while (statement != NULL) + { + if (statement->nodeType == HLSLNodeType_Technique) + { + HLSLTechnique * technique = (HLSLTechnique *)statement; + if (String_Equal(name, technique->name)) + { + return technique; + } + } + + statement = statement->nextStatement; + } + + return NULL; +} + +HLSLPipeline * HLSLTree::FindFirstPipeline() +{ + return FindNextPipeline(NULL); +} + +HLSLPipeline * HLSLTree::FindNextPipeline(HLSLPipeline * current) +{ + HLSLStatement * statement = current ? current : m_root->statement; + while (statement != NULL) + { + if (statement->nodeType == HLSLNodeType_Pipeline) + { + return (HLSLPipeline *)statement; + } + + statement = statement->nextStatement; + } + + return NULL; +} + +HLSLPipeline * HLSLTree::FindPipeline(const char * name) +{ + HLSLStatement * statement = m_root->statement; + while (statement != NULL) + { + if (statement->nodeType == HLSLNodeType_Pipeline) + { + HLSLPipeline * pipeline = (HLSLPipeline *)statement; + if (String_Equal(name, pipeline->name)) + { + return pipeline; + } + } + + statement = statement->nextStatement; + } + + return NULL; +} +*/ + +HLSLBuffer *HLSLTree::FindBuffer(const char *name) +{ + HLSLStatement *statement = m_root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer *buffer = (HLSLBuffer *)statement; + if (String_Equal(name, buffer->name)) { + return buffer; + } + } + + statement = statement->nextStatement; + } + + return NULL; +} + +bool HLSLTree::GetExpressionValue(HLSLExpression *expression, int &value) +{ + ASSERT(expression != NULL); + + // Expression must be constant. + if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) { + return false; + } + + // We are expecting an integer scalar. + // TODO: Add support for type conversion from uint scalar types. + if (expression->expressionType.baseType != HLSLBaseType_Long && + expression->expressionType.baseType != HLSLBaseType_Short && + expression->expressionType.baseType != HLSLBaseType_Int && + + expression->expressionType.baseType != HLSLBaseType_Bool) { + return false; + } + + if (expression->expressionType.array) { + return false; + } + + if (expression->nodeType == HLSLNodeType_BinaryExpression) { + HLSLBinaryExpression *binaryExpression = (HLSLBinaryExpression *)expression; + + int value1, value2; + if (!GetExpressionValue(binaryExpression->expression1, value1) || + !GetExpressionValue(binaryExpression->expression2, value2)) { + return false; + } + + switch (binaryExpression->binaryOp) { + case HLSLBinaryOp_And: + value = value1 && value2; + return true; + case HLSLBinaryOp_Or: + value = value1 || value2; + return true; + case HLSLBinaryOp_Add: + value = value1 + value2; + return true; + case HLSLBinaryOp_Sub: + value = value1 - value2; + return true; + case HLSLBinaryOp_Mul: + value = value1 * value2; + return true; + case HLSLBinaryOp_Div: + value = value1 / value2; + return true; + case HLSLBinaryOp_Less: + value = value1 < value2; + return true; + case HLSLBinaryOp_Greater: + value = value1 > value2; + return true; + case HLSLBinaryOp_LessEqual: + value = value1 <= value2; + return true; + case HLSLBinaryOp_GreaterEqual: + value = value1 >= value2; + return true; + case HLSLBinaryOp_Equal: + value = value1 == value2; + return true; + case HLSLBinaryOp_NotEqual: + value = value1 != value2; + return true; + case HLSLBinaryOp_BitAnd: + value = value1 & value2; + return true; + case HLSLBinaryOp_BitOr: + value = value1 | value2; + return true; + case HLSLBinaryOp_BitXor: + value = value1 ^ value2; + return true; + case HLSLBinaryOp_Assign: + case HLSLBinaryOp_AddAssign: + case HLSLBinaryOp_SubAssign: + case HLSLBinaryOp_MulAssign: + case HLSLBinaryOp_DivAssign: + // IC: These are not valid on non-constant expressions and should fail earlier when querying expression value. + return false; + } + } + else if (expression->nodeType == HLSLNodeType_UnaryExpression) { + HLSLUnaryExpression *unaryExpression = (HLSLUnaryExpression *)expression; + + if (!GetExpressionValue(unaryExpression->expression, value)) { + return false; + } + + switch (unaryExpression->unaryOp) { + case HLSLUnaryOp_Negative: + value = -value; + return true; + case HLSLUnaryOp_Positive: + // nop. + return true; + case HLSLUnaryOp_Not: + value = !value; + return true; + case HLSLUnaryOp_BitNot: + value = ~value; + return true; + case HLSLUnaryOp_PostDecrement: + case HLSLUnaryOp_PostIncrement: + case HLSLUnaryOp_PreDecrement: + case HLSLUnaryOp_PreIncrement: + // IC: These are not valid on non-constant expressions and should fail earlier when querying expression value. + return false; + } + } + else if (expression->nodeType == HLSLNodeType_IdentifierExpression) { + HLSLIdentifierExpression *identifier = (HLSLIdentifierExpression *)expression; + + HLSLDeclaration *declaration = FindGlobalDeclaration(identifier->name); + if (declaration == NULL) { + return false; + } + if ((declaration->type.flags & HLSLTypeFlag_Const) == 0) { + return false; + } + + return GetExpressionValue(declaration->assignment, value); + } + else if (expression->nodeType == HLSLNodeType_LiteralExpression) { + HLSLLiteralExpression *literal = (HLSLLiteralExpression *)expression; + + if (literal->expressionType.baseType == HLSLBaseType_Int) + value = literal->iValue; + else if (literal->expressionType.baseType == HLSLBaseType_Long) + value = literal->iValue; // precision loss to Int + else if (literal->expressionType.baseType == HLSLBaseType_Short) + value = literal->iValue; + + else if (literal->expressionType.baseType == HLSLBaseType_Bool) + value = (int)literal->bValue; + else + return false; + + return true; + } + + return false; +} + +// TODO: Nothing calling this? +bool HLSLTree::NeedsFunction(const char *name) +{ + // Early out + if (!GetContainsString(name)) + return false; + + struct NeedsFunctionVisitor : HLSLTreeVisitor { + const char *name; + bool result; + + virtual ~NeedsFunctionVisitor() {} + + virtual void VisitTopLevelStatement(HLSLStatement *node) + { + if (!node->hidden) + HLSLTreeVisitor::VisitTopLevelStatement(node); + } + + virtual void VisitFunctionCall(HLSLFunctionCall *node) + { + result = result || String_Equal(name, node->function->name); + + HLSLTreeVisitor::VisitFunctionCall(node); + } + }; + + NeedsFunctionVisitor visitor; + visitor.name = name; + visitor.result = false; + + visitor.VisitRoot(m_root); + + return visitor.result; +} + +// Returns dimension, 0 if invalid. +int HLSLTree::GetExpressionValue(HLSLExpression *expression, float values[4]) +{ + ASSERT(expression != NULL); + + // Expression must be constant. + if ((expression->expressionType.flags & HLSLTypeFlag_Const) == 0) { + return 0; + } + + HLSLBaseType type = expression->expressionType.baseType; + + if (IsIntegerType(type)) { + if (IsScalarType(type)) { + int intValue; + if (GetExpressionValue(expression, intValue)) { + for (int i = 0; i < 4; i++) values[i] = (float)intValue; // @@ Warn if conversion is not exact. + return 1; + } + } + + return 0; + } + // this skips other int types not handled above + if (!IsFloatingType(type)) + return 0; + + // @@ Not supported yet, but we may need it? + if (expression->expressionType.array) { + return false; + } + + int dim = GetVectorDimension(type); + + if (expression->nodeType == HLSLNodeType_BinaryExpression) { + HLSLBinaryExpression *binaryExpression = (HLSLBinaryExpression *)expression; + + float values1[4], values2[4]; + int dim1 = GetExpressionValue(binaryExpression->expression1, values1); + int dim2 = GetExpressionValue(binaryExpression->expression2, values2); + + if (dim1 == 0 || dim2 == 0) { + return 0; + } + + if (dim1 != dim2) { + // Broadcast scalar to vector size. + if (dim1 == 1) { + for (int i = 1; i < dim2; i++) values1[i] = values1[0]; + dim1 = dim2; + } + else if (dim2 == 1) { + for (int i = 1; i < dim1; i++) values2[i] = values2[0]; + dim2 = dim1; + } + else { + return 0; + } + } + ASSERT(dim == dim1); + + switch (binaryExpression->binaryOp) { + case HLSLBinaryOp_Add: + for (int i = 0; i < dim; i++) values[i] = values1[i] + values2[i]; + return dim; + case HLSLBinaryOp_Sub: + for (int i = 0; i < dim; i++) values[i] = values1[i] - values2[i]; + return dim; + case HLSLBinaryOp_Mul: + for (int i = 0; i < dim; i++) values[i] = values1[i] * values2[i]; + return dim; + case HLSLBinaryOp_Div: + for (int i = 0; i < dim; i++) values[i] = values1[i] / values2[i]; + return dim; + default: + return 0; + } + } + else if (expression->nodeType == HLSLNodeType_UnaryExpression) { + HLSLUnaryExpression *unaryExpression = (HLSLUnaryExpression *)expression; + + int dim1 = GetExpressionValue(unaryExpression->expression, values); + if (dim1 == 0) { + return 0; + } + ASSERT(dim == dim1); + + switch (unaryExpression->unaryOp) { + case HLSLUnaryOp_Negative: + for (int i = 0; i < dim; i++) values[i] = -values[i]; + return dim; + case HLSLUnaryOp_Positive: + // nop. + return dim; + default: + return 0; + } + } + else if (expression->nodeType == HLSLNodeType_ConstructorExpression) { + HLSLConstructorExpression *constructor = (HLSLConstructorExpression *)expression; + + int idx = 0; + HLSLExpression *arg = constructor->argument; + while (arg != NULL) { + float tmp[4]; + int n = GetExpressionValue(arg, tmp); + for (int i = 0; i < n; i++) values[idx + i] = tmp[i]; + idx += n; + + arg = arg->nextExpression; + } + ASSERT(dim == idx); + + return dim; + } + else if (expression->nodeType == HLSLNodeType_IdentifierExpression) { + HLSLIdentifierExpression *identifier = (HLSLIdentifierExpression *)expression; + + HLSLDeclaration *declaration = FindGlobalDeclaration(identifier->name); + if (declaration == NULL) { + return 0; + } + if ((declaration->type.flags & HLSLTypeFlag_Const) == 0) { + return 0; + } + + return GetExpressionValue(declaration->assignment, values); + } + else if (expression->nodeType == HLSLNodeType_LiteralExpression) { + HLSLLiteralExpression *literal = (HLSLLiteralExpression *)expression; + + if (literal->expressionType.baseType == HLSLBaseType_Float) + values[0] = literal->fValue; + else if (literal->expressionType.baseType == HLSLBaseType_Half) + values[0] = literal->fValue; + else if (literal->expressionType.baseType == HLSLBaseType_Double) + values[0] = literal->fValue; // TODO: need more precision + + else if (literal->expressionType.baseType == HLSLBaseType_Bool) + values[0] = literal->bValue; + + // TODO: add uint types, fix precision of short/long/double/half + // signed ints + else if (literal->expressionType.baseType == HLSLBaseType_Int) + values[0] = (float)literal->iValue; // @@ Warn if conversion is not exact. + else if (literal->expressionType.baseType == HLSLBaseType_Short) + values[0] = (float)literal->iValue; + else if (literal->expressionType.baseType == HLSLBaseType_Long) + values[0] = (float)literal->iValue; + else + return 0; + + return 1; + } + + return 0; +} + +void HLSLTreeVisitor::VisitType(HLSLType &type) +{ +} + +void HLSLTreeVisitor::VisitRoot(HLSLRoot *root) +{ + HLSLStatement *statement = root->statement; + while (statement != NULL) { + VisitTopLevelStatement(statement); + statement = statement->nextStatement; + } +} + +void HLSLTreeVisitor::VisitTopLevelStatement(HLSLStatement *node) +{ + if (node->nodeType == HLSLNodeType_Declaration) { + VisitDeclaration((HLSLDeclaration *)node); + } + else if (node->nodeType == HLSLNodeType_Struct) { + VisitStruct((HLSLStruct *)node); + } + else if (node->nodeType == HLSLNodeType_Buffer) { + VisitBuffer((HLSLBuffer *)node); + } + else if (node->nodeType == HLSLNodeType_Function) { + VisitFunction((HLSLFunction *)node); + } + else if (node->nodeType == HLSLNodeType_Comment) { + VisitComment((HLSLComment *)node); + } + + // FX file stuff + // else if (node->nodeType == HLSLNodeType_Technique) { + // VisitTechnique((HLSLTechnique *)node); + // } + // else if (node->nodeType == HLSLNodeType_Pipeline) { + // VisitPipeline((HLSLPipeline *)node); + // } + + else { + ASSERT(false); + } +} + +void HLSLTreeVisitor::VisitStatements(HLSLStatement *statement) +{ + while (statement != NULL) { + VisitStatement(statement); + statement = statement->nextStatement; + } +} + +void HLSLTreeVisitor::VisitStatement(HLSLStatement *node) +{ + // Function statements + if (node->nodeType == HLSLNodeType_Declaration) { + VisitDeclaration((HLSLDeclaration *)node); + } + else if (node->nodeType == HLSLNodeType_ExpressionStatement) { + VisitExpressionStatement((HLSLExpressionStatement *)node); + } + else if (node->nodeType == HLSLNodeType_ReturnStatement) { + VisitReturnStatement((HLSLReturnStatement *)node); + } + else if (node->nodeType == HLSLNodeType_DiscardStatement) { + VisitDiscardStatement((HLSLDiscardStatement *)node); + } + else if (node->nodeType == HLSLNodeType_BreakStatement) { + VisitBreakStatement((HLSLBreakStatement *)node); + } + else if (node->nodeType == HLSLNodeType_ContinueStatement) { + VisitContinueStatement((HLSLContinueStatement *)node); + } + else if (node->nodeType == HLSLNodeType_IfStatement) { + VisitIfStatement((HLSLIfStatement *)node); + } + else if (node->nodeType == HLSLNodeType_ForStatement) { + VisitForStatement((HLSLForStatement *)node); + } + else if (node->nodeType == HLSLNodeType_BlockStatement) { + VisitBlockStatement((HLSLBlockStatement *)node); + } + else if (node->nodeType == HLSLNodeType_Comment) { + VisitComment((HLSLComment *)node); + } + else { + ASSERT(false); + } +} + +void HLSLTreeVisitor::VisitDeclaration(HLSLDeclaration *node) +{ + VisitType(node->type); + /*do { + VisitExpression(node->assignment); + node = node->nextDeclaration; + } while (node);*/ + if (node->assignment != NULL) { + VisitExpression(node->assignment); + } + if (node->nextDeclaration != NULL) { + VisitDeclaration(node->nextDeclaration); + } +} + +void HLSLTreeVisitor::VisitStruct(HLSLStruct *node) +{ + HLSLStructField *field = node->field; + while (field != NULL) { + VisitStructField(field); + field = field->nextField; + } +} + +void HLSLTreeVisitor::VisitStructField(HLSLStructField *node) +{ + // This can use a constant in an array field that must be resolved + if (node->type.array) { + VisitExpression(node->type.arraySize); + } + + VisitType(node->type); +} + +void HLSLTreeVisitor::VisitBuffer(HLSLBuffer *node) +{ + if (node->IsGlobalFields()) { + HLSLDeclaration *field = node->field; + while (field != NULL) { + ASSERT(field->nodeType == HLSLNodeType_Declaration); + VisitDeclaration(field); + ASSERT(field->nextDeclaration == NULL); + field = (HLSLDeclaration *)field->nextStatement; + } + } + else { + VisitStruct(node->bufferStruct); + } +} + +/*void HLSLTreeVisitor::VisitBufferField(HLSLBufferField * node) +{ + VisitType(node->type); +}*/ + +void HLSLTreeVisitor::VisitFunction(HLSLFunction *node) +{ + VisitType(node->returnType); + + HLSLArgument *argument = node->argument; + while (argument != NULL) { + VisitArgument(argument); + argument = argument->nextArgument; + } + + VisitStatements(node->statement); +} + +void HLSLTreeVisitor::VisitArgument(HLSLArgument *node) +{ + VisitType(node->type); + if (node->defaultValue != NULL) { + VisitExpression(node->defaultValue); + } +} + +void HLSLTreeVisitor::VisitExpressionStatement(HLSLExpressionStatement *node) +{ + VisitExpression(node->expression); +} + +void HLSLTreeVisitor::VisitExpression(HLSLExpression *node) +{ + VisitType(node->expressionType); + + if (node->nodeType == HLSLNodeType_UnaryExpression) { + VisitUnaryExpression((HLSLUnaryExpression *)node); + } + else if (node->nodeType == HLSLNodeType_BinaryExpression) { + VisitBinaryExpression((HLSLBinaryExpression *)node); + } + else if (node->nodeType == HLSLNodeType_ConditionalExpression) { + VisitConditionalExpression((HLSLConditionalExpression *)node); + } + else if (node->nodeType == HLSLNodeType_CastingExpression) { + VisitCastingExpression((HLSLCastingExpression *)node); + } + else if (node->nodeType == HLSLNodeType_LiteralExpression) { + VisitLiteralExpression((HLSLLiteralExpression *)node); + } + else if (node->nodeType == HLSLNodeType_IdentifierExpression) { + VisitIdentifierExpression((HLSLIdentifierExpression *)node); + } + else if (node->nodeType == HLSLNodeType_ConstructorExpression) { + VisitConstructorExpression((HLSLConstructorExpression *)node); + } + else if (node->nodeType == HLSLNodeType_MemberAccess) { + VisitMemberAccess((HLSLMemberAccess *)node); + } + else if (node->nodeType == HLSLNodeType_ArrayAccess) { + VisitArrayAccess((HLSLArrayAccess *)node); + } + else if (node->nodeType == HLSLNodeType_FunctionCall) { + VisitFunctionCall((HLSLFunctionCall *)node); + } + else if (node->nodeType == HLSLNodeType_MemberFunctionCall) { + HLSLMemberFunctionCall *memberFunctionCall = (HLSLMemberFunctionCall *)node; + VisitIdentifierExpression((HLSLIdentifierExpression *)memberFunctionCall->memberIdentifier); // const_cast + VisitFunctionCall(memberFunctionCall); + } + // Acoget-TODO: This was missing. Did adding it break anything? + // else if (node->nodeType == HLSLNodeType_SamplerState) { + // VisitSamplerState((HLSLSamplerState *)node); + // } + else { + ASSERT(false); + } +} + +void HLSLTreeVisitor::VisitReturnStatement(HLSLReturnStatement *node) +{ + VisitExpression(node->expression); +} + +void HLSLTreeVisitor::VisitDiscardStatement(HLSLDiscardStatement *node) {} +void HLSLTreeVisitor::VisitBreakStatement(HLSLBreakStatement *node) {} +void HLSLTreeVisitor::VisitContinueStatement(HLSLContinueStatement *node) {} + +void HLSLTreeVisitor::VisitIfStatement(HLSLIfStatement *node) +{ + VisitExpression(node->condition); + VisitStatements(node->statement); + if (node->elseStatement) { + VisitStatements(node->elseStatement); + } +} + +void HLSLTreeVisitor::VisitForStatement(HLSLForStatement *node) +{ + if (node->initialization) { + VisitDeclaration(node->initialization); + } + if (node->condition) { + VisitExpression(node->condition); + } + if (node->increment) { + VisitExpression(node->increment); + } + VisitStatements(node->statement); +} + +void HLSLTreeVisitor::VisitBlockStatement(HLSLBlockStatement *node) +{ + VisitStatements(node->statement); +} + +void HLSLTreeVisitor::VisitUnaryExpression(HLSLUnaryExpression *node) +{ + VisitExpression(node->expression); +} + +void HLSLTreeVisitor::VisitBinaryExpression(HLSLBinaryExpression *node) +{ + VisitExpression(node->expression1); + VisitExpression(node->expression2); +} + +void HLSLTreeVisitor::VisitConditionalExpression(HLSLConditionalExpression *node) +{ + VisitExpression(node->condition); + VisitExpression(node->falseExpression); + VisitExpression(node->trueExpression); +} + +void HLSLTreeVisitor::VisitCastingExpression(HLSLCastingExpression *node) +{ + VisitType(node->type); + VisitExpression(node->expression); +} + +void HLSLTreeVisitor::VisitLiteralExpression(HLSLLiteralExpression *node) {} +void HLSLTreeVisitor::VisitIdentifierExpression(HLSLIdentifierExpression *node) {} + +void HLSLTreeVisitor::VisitConstructorExpression(HLSLConstructorExpression *node) +{ + HLSLExpression *argument = node->argument; + while (argument != NULL) { + VisitExpression(argument); + argument = argument->nextExpression; + } +} + +void HLSLTreeVisitor::VisitMemberAccess(HLSLMemberAccess *node) +{ + VisitExpression(node->object); +} + +void HLSLTreeVisitor::VisitArrayAccess(HLSLArrayAccess *node) +{ + VisitExpression(node->array); + VisitExpression(node->index); +} + +void HLSLTreeVisitor::VisitFunctionCall(HLSLFunctionCall *node) +{ + HLSLExpression *argument = node->argument; + while (argument != NULL) { + VisitExpression(argument); + argument = argument->nextExpression; + } +} + +/* +void HLSLTreeVisitor::VisitStateAssignment(HLSLStateAssignment * node) {} + +void HLSLTreeVisitor::VisitSamplerState(HLSLSamplerState * node) +{ + HLSLStateAssignment * stateAssignment = node->stateAssignments; + while (stateAssignment != NULL) { + VisitStateAssignment(stateAssignment); + stateAssignment = stateAssignment->nextStateAssignment; + } +} + +void HLSLTreeVisitor::VisitPass(HLSLPass * node) +{ + HLSLStateAssignment * stateAssignment = node->stateAssignments; + while (stateAssignment != NULL) { + VisitStateAssignment(stateAssignment); + stateAssignment = stateAssignment->nextStateAssignment; + } +} + +void HLSLTreeVisitor::VisitTechnique(HLSLTechnique * node) +{ + HLSLPass * pass = node->passes; + while (pass != NULL) { + VisitPass(pass); + pass = pass->nextPass; + } +} + + +void HLSLTreeVisitor::VisitPipeline(HLSLPipeline * node) +{ + // This is for FX files +} +*/ + +void HLSLTreeVisitor::VisitComment(HLSLComment *node) +{ +} + +void HLSLTreeVisitor::VisitFunctions(HLSLRoot *root) +{ + HLSLStatement *statement = root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Function) { + VisitFunction((HLSLFunction *)statement); + } + + statement = statement->nextStatement; + } +} + +void HLSLTreeVisitor::VisitParameters(HLSLRoot *root) +{ + HLSLStatement *statement = root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Declaration) { + VisitDeclaration((HLSLDeclaration *)statement); + } + + statement = statement->nextStatement; + } +} + +class ResetHiddenFlagVisitor : public HLSLTreeVisitor { +public: + virtual ~ResetHiddenFlagVisitor() {} + + virtual void VisitTopLevelStatement(HLSLStatement *statement) override + { + statement->hidden = true; + + if (statement->nodeType == HLSLNodeType_Buffer) { + VisitBuffer((HLSLBuffer *)statement); + } + } + + // Hide buffer fields. + virtual void VisitDeclaration(HLSLDeclaration *node) override + { + // node->hidden = true; + } + + virtual void VisitComment(HLSLComment *node) override + { + node->hidden = true; + } + + virtual void VisitArgument(HLSLArgument *node) override + { + node->hidden = false; // Arguments are visible by default. + } +}; + +class MarkVisibleStatementsVisitor : public HLSLTreeVisitor { +public: + HLSLTree *tree; + MarkVisibleStatementsVisitor(HLSLTree *tree) : tree(tree) {} + + virtual ~MarkVisibleStatementsVisitor() {} + + virtual void VisitComment(HLSLComment *node) override + { + node->hidden = false; + } + + virtual void VisitFunction(HLSLFunction *node) override + { + node->hidden = false; + HLSLTreeVisitor::VisitFunction(node); + + if (node->forward) + VisitFunction(node->forward); + } + + virtual void VisitFunctionCall(HLSLFunctionCall *node) override + { + HLSLTreeVisitor::VisitFunctionCall(node); + + if (node->function->hidden) { + VisitFunction(const_cast(node->function)); + } + } + + virtual void VisitIdentifierExpression(HLSLIdentifierExpression *node) override + { + HLSLTreeVisitor::VisitIdentifierExpression(node); + + if (node->global) { + HLSLBuffer *buffer = NULL; + HLSLDeclaration *declaration = tree->FindGlobalDeclaration(node->name, &buffer); + + if (declaration != NULL && declaration->hidden) { + declaration->hidden = false; + VisitDeclaration(declaration); + } + if (buffer != NULL && buffer->hidden) { + buffer->hidden = false; + } + } + } + + virtual void VisitType(HLSLType &type) override + { + // if (type.array) + // { + // // Alec added this to try to handle structs with array constants, but + // // it causes other issues. VisitStructField calls VisitType. + // + // // handle sized or unsized array, since sized may use constant + // // VisitExpression(type.arraySize); + // int bp = 0; + // bp = bp; + // } + // else + if (type.baseType == HLSLBaseType_UserDefined) { + HLSLStruct *globalStruct = tree->FindGlobalStruct(type.typeName); + if (globalStruct != NULL) { + globalStruct->hidden = false; + VisitStruct(globalStruct); + } + } + } +}; + +void PruneTree(HLSLTree *tree, const char *entryName0, const char *entryName1 /*=NULL*/) +{ + HLSLRoot *root = tree->GetRoot(); + + // Reset all flags. + ResetHiddenFlagVisitor reset; + reset.VisitRoot(root); + + // Mark all the statements necessary for these entrypoints. + HLSLFunction *entry = tree->FindFunction(entryName0); + if (entry != NULL) { + MarkVisibleStatementsVisitor mark(tree); + mark.VisitFunction(entry); + } + + if (entryName1 != NULL) { + entry = tree->FindFunction(entryName1); + if (entry != NULL) { + MarkVisibleStatementsVisitor mark(tree); + mark.VisitFunction(entry); + } + } + + // Mark buffers visible, if any of their fields is visible. + HLSLStatement *statement = root->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer *buffer = (HLSLBuffer *)statement; + + if (buffer->IsGlobalFields()) { + // mark buffer visible if any of its fields are used + HLSLDeclaration *field = buffer->field; + while (field != NULL) { + ASSERT(field->nodeType == HLSLNodeType_Declaration); + if (!field->hidden) { + buffer->hidden = false; + break; + } + field = (HLSLDeclaration *)field->nextStatement; + } + } + else { + // TODO: these load from a struct so may just need + // to somehow mark this if present. + + /* all struct fields are hidden = false, so this doesn't work + // mark buffer visible if any struct fields are used + HLSLStructField* field = buffer->bufferStruct->field; + while (field != NULL) + { + ASSERT(field->nodeType == HLSLNodeType_StructField); + if (!field->hidden) + { + buffer->hidden = false; + break; + } + field = (HLSLStructField*)field->nextField; + } + */ + } + } + + statement = statement->nextStatement; + } +} + +void SortTree(HLSLTree *tree) +{ + // Stable sort so that statements are in this order: + // const scalars for arrays, structs, declarations, functions, techniques. + // but their relative order is preserved. + + HLSLRoot *root = tree->GetRoot(); + + HLSLStatement *constScalarDeclarations = NULL; + HLSLStatement *lastConstScalarDeclaration = NULL; + + HLSLStatement *structs = NULL; + HLSLStatement *lastStruct = NULL; + + HLSLStatement *constDeclarations = NULL; + HLSLStatement *lastConstDeclaration = NULL; + + HLSLStatement *declarations = NULL; + HLSLStatement *lastDeclaration = NULL; + + HLSLStatement *functions = NULL; + HLSLStatement *lastFunction = NULL; + + HLSLStatement *other = NULL; + HLSLStatement *lastOther = NULL; + +#define AppendToList(statement, list, listLast) \ + if (list == NULL) list = statement; \ + if (listLast != NULL) listLast->nextStatement = statement; \ + listLast = statement; + + HLSLStatement *statement = root->statement; + while (statement != NULL) { + HLSLStatement *nextStatement = statement->nextStatement; + statement->nextStatement = NULL; + + if (statement->nodeType == HLSLNodeType_Struct) { + AppendToList(statement, structs, lastStruct); + } + else if (statement->nodeType == HLSLNodeType_Declaration || + statement->nodeType == HLSLNodeType_Buffer) { + // There are cases where a struct uses a const array size, + // so those need to be ordered prior to the struct. + if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration *decl = (HLSLDeclaration *)statement; + + if (decl->type.flags & HLSLTypeFlag_Const) { + // this is a global scalar, so best to order first + if (IsScalarType(decl->type.baseType)) { + AppendToList(statement, constScalarDeclarations, lastConstScalarDeclaration); + } + else { + AppendToList(statement, constDeclarations, lastConstDeclaration); + } + } + else { + AppendToList(statement, declarations, lastDeclaration); + } + } + else if (statement->nodeType == HLSLNodeType_Buffer) { + AppendToList(statement, declarations, lastDeclaration); + } + } + else if (statement->nodeType == HLSLNodeType_Function) { + AppendToList(statement, functions, lastFunction); + } + else { + AppendToList(statement, other, lastOther); + } + + statement = nextStatement; + } + + // Chain all the statements in the order that we want. + HLSLStatement *firstStatement = constScalarDeclarations; + HLSLStatement *lastStatement = lastConstScalarDeclaration; + + if (structs != NULL) { + if (firstStatement == NULL) + firstStatement = structs; + else + lastStatement->nextStatement = structs; + lastStatement = lastStruct; + } + + if (constDeclarations != NULL) { + if (firstStatement == NULL) + firstStatement = constDeclarations; + else + lastStatement->nextStatement = constDeclarations; + lastStatement = lastConstDeclaration; + } + + if (declarations != NULL) { + if (firstStatement == NULL) + firstStatement = declarations; + else + lastStatement->nextStatement = declarations; + lastStatement = lastDeclaration; + } + + if (functions != NULL) { + if (firstStatement == NULL) + firstStatement = functions; + else + lastStatement->nextStatement = functions; + lastStatement = lastFunction; + } + + if (other != NULL) { + if (firstStatement == NULL) + firstStatement = other; + else + lastStatement->nextStatement = other; + lastStatement = lastOther; + } + + root->statement = firstStatement; +} + +// First and last can be the same. +void AddStatements(HLSLRoot *root, HLSLStatement *before, HLSLStatement *first, HLSLStatement *last) +{ + if (before == NULL) { + last->nextStatement = root->statement; + root->statement = first; + } + else { + last->nextStatement = before->nextStatement; + before->nextStatement = first; + } +} + +void AddSingleStatement(HLSLRoot *root, HLSLStatement *before, HLSLStatement *statement) +{ + AddStatements(root, before, statement, statement); +} + +/* *X file releated +// @@ This is very game-specific. Should be moved to pipeline_parser or somewhere else. +void GroupParameters(HLSLTree * tree) +{ + // Sort parameters based on semantic and group them in cbuffers. + + HLSLRoot* root = tree->GetRoot(); + + HLSLDeclaration * firstPerItemDeclaration = NULL; + HLSLDeclaration * lastPerItemDeclaration = NULL; + + HLSLDeclaration * instanceDataDeclaration = NULL; + + HLSLDeclaration * firstPerPassDeclaration = NULL; + HLSLDeclaration * lastPerPassDeclaration = NULL; + + HLSLDeclaration * firstPerItemSampler = NULL; + HLSLDeclaration * lastPerItemSampler = NULL; + + HLSLDeclaration * firstPerPassSampler = NULL; + HLSLDeclaration * lastPerPassSampler = NULL; + + HLSLStatement * statementBeforeBuffers = NULL; + + HLSLStatement* previousStatement = NULL; + HLSLStatement* statement = root->statement; + while (statement != NULL) + { + HLSLStatement* nextStatement = statement->nextStatement; + + if (statement->nodeType == HLSLNodeType_Struct) // Do not remove this, or it will mess the else clause below. + { + statementBeforeBuffers = statement; + } + else if (statement->nodeType == HLSLNodeType_Declaration) + { + HLSLDeclaration* declaration = (HLSLDeclaration*)statement; + + // We insert buffers after the last const declaration. + if ((declaration->type.flags & HLSLTypeFlag_Const) != 0) + { + statementBeforeBuffers = statement; + } + + // Do not move samplers or static/const parameters. + if ((declaration->type.flags & (HLSLTypeFlag_Static|HLSLTypeFlag_Const)) == 0) + { + // Unlink statement. + statement->nextStatement = NULL; + if (previousStatement != NULL) previousStatement->nextStatement = nextStatement; + else root->statement = nextStatement; + + while(declaration != NULL) + { + HLSLDeclaration* nextDeclaration = declaration->nextDeclaration; + + if (declaration->semantic != NULL && String_EqualNoCase(declaration->semantic, "PER_INSTANCED_ITEM")) + { + ASSERT(instanceDataDeclaration == NULL); + instanceDataDeclaration = declaration; + } + else + { + // Select group based on type and semantic. + HLSLDeclaration ** first, ** last; + if (declaration->semantic == NULL || String_EqualNoCase(declaration->semantic, "PER_ITEM") || String_EqualNoCase(declaration->semantic, "PER_MATERIAL")) + { + if (IsSamplerType(declaration->type)) + { + first = &firstPerItemSampler; + last = &lastPerItemSampler; + } + else + { + first = &firstPerItemDeclaration; + last = &lastPerItemDeclaration; + } + } + else + { + if (IsSamplerType(declaration->type)) + { + first = &firstPerPassSampler; + last = &lastPerPassSampler; + } + else + { + first = &firstPerPassDeclaration; + last = &lastPerPassDeclaration; + } + } + + // Add declaration to new list. + if (*first == NULL) *first = declaration; + else (*last)->nextStatement = declaration; + *last = declaration; + } + + // Unlink from declaration list. + declaration->nextDeclaration = NULL; + + // Reset attributes. + declaration->registerName = NULL; + //declaration->semantic = NULL; // @@ Don't do this! + + declaration = nextDeclaration; + } + } + } + //else + //{ + //if (statementBeforeBuffers == NULL) { + // This is the location where we will insert our buffers. + // statementBeforeBuffers = previousStatement; + // } + //} + + if (statement->nextStatement == nextStatement) { + previousStatement = statement; + } + statement = nextStatement; + } + + + // Add instance data declaration at the end of the per_item buffer. + if (instanceDataDeclaration != NULL) + { + if (firstPerItemDeclaration == NULL) firstPerItemDeclaration = instanceDataDeclaration; + else lastPerItemDeclaration->nextStatement = instanceDataDeclaration; + } + + + // Add samplers. + if (firstPerItemSampler != NULL) { + AddStatements(root, statementBeforeBuffers, firstPerItemSampler, lastPerItemSampler); + statementBeforeBuffers = lastPerItemSampler; + } + if (firstPerPassSampler != NULL) { + AddStatements(root, statementBeforeBuffers, firstPerPassSampler, lastPerPassSampler); + statementBeforeBuffers = lastPerPassSampler; + } + + + // @@ We are assuming per_item and per_pass buffers don't already exist. @@ We should assert on that. + + if (firstPerItemDeclaration != NULL) + { + // Create buffer statement. + HLSLBuffer * perItemBuffer = tree->AddNode(firstPerItemDeclaration->fileName, firstPerItemDeclaration->line-1); + perItemBuffer->name = tree->AddString("per_item"); + perItemBuffer->registerName = tree->AddString("b0"); + perItemBuffer->field = firstPerItemDeclaration; + + // Set declaration buffer pointers. + HLSLDeclaration * field = perItemBuffer->field; + while (field != NULL) + { + field->buffer = perItemBuffer; + field = (HLSLDeclaration *)field->nextStatement; + } + + // Add buffer to statements. + AddSingleStatement(root, statementBeforeBuffers, perItemBuffer); + statementBeforeBuffers = perItemBuffer; + } + + if (firstPerPassDeclaration != NULL) + { + // Create buffer statement. + HLSLBuffer * perPassBuffer = tree->AddNode(firstPerPassDeclaration->fileName, firstPerPassDeclaration->line-1); + perPassBuffer->name = tree->AddString("per_pass"); + perPassBuffer->registerName = tree->AddString("b1"); + perPassBuffer->field = firstPerPassDeclaration; + + // Set declaration buffer pointers. + HLSLDeclaration * field = perPassBuffer->field; + while (field != NULL) + { + field->buffer = perPassBuffer; + field = (HLSLDeclaration *)field->nextStatement; + } + + // Add buffer to statements. + AddSingleStatement(root, statementBeforeBuffers, perPassBuffer); + } +} +*/ + +class FindArgumentVisitor : public HLSLTreeVisitor { +public: + bool found; + const char *name; + + virtual ~FindArgumentVisitor() {} + + FindArgumentVisitor() + { + found = false; + name = NULL; + } + + bool FindArgument(const char *_name, HLSLFunction *function) + { + found = false; + name = _name; + VisitStatements(function->statement); + return found; + } + + virtual void VisitStatements(HLSLStatement *statement) override + { + while (statement != NULL && !found) { + VisitStatement(statement); + statement = statement->nextStatement; + } + } + + virtual void VisitIdentifierExpression(HLSLIdentifierExpression *node) override + { + if (node->name == name) { + found = true; + } + } +}; + +void HideUnusedArguments(HLSLFunction *function) +{ + FindArgumentVisitor visitor; + + // For each argument. + HLSLArgument *arg = function->argument; + while (arg != NULL) { + if (!visitor.FindArgument(arg->name, function)) { + arg->hidden = true; + } + + arg = arg->nextArgument; + } +} + +bool NeedsFlattening(HLSLExpression *expr, int level = 0) +{ + if (expr == NULL) { + return false; + } + if (expr->nodeType == HLSLNodeType_UnaryExpression) { + HLSLUnaryExpression *unaryExpr = (HLSLUnaryExpression *)expr; + return NeedsFlattening(unaryExpr->expression, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_BinaryExpression) { + HLSLBinaryExpression *binaryExpr = (HLSLBinaryExpression *)expr; + if (IsAssignOp(binaryExpr->binaryOp)) { + return NeedsFlattening(binaryExpr->expression2, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else { + return NeedsFlattening(binaryExpr->expression1, level + 1) || NeedsFlattening(binaryExpr->expression2, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + } + else if (expr->nodeType == HLSLNodeType_ConditionalExpression) { + HLSLConditionalExpression *conditionalExpr = (HLSLConditionalExpression *)expr; + return NeedsFlattening(conditionalExpr->condition, level + 1) || NeedsFlattening(conditionalExpr->trueExpression, level + 1) || NeedsFlattening(conditionalExpr->falseExpression, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_CastingExpression) { + HLSLCastingExpression *castingExpr = (HLSLCastingExpression *)expr; + return NeedsFlattening(castingExpr->expression, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_LiteralExpression) { + return NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_IdentifierExpression) { + return NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_ConstructorExpression) { + HLSLConstructorExpression *constructorExpr = (HLSLConstructorExpression *)expr; + return NeedsFlattening(constructorExpr->argument, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_MemberAccess) { + return NeedsFlattening(expr->nextExpression, level + 1); + } + else if (expr->nodeType == HLSLNodeType_ArrayAccess) { + HLSLArrayAccess *arrayAccess = (HLSLArrayAccess *)expr; + return NeedsFlattening(arrayAccess->array, level + 1) || NeedsFlattening(arrayAccess->index, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else if (expr->nodeType == HLSLNodeType_FunctionCall) { + HLSLFunctionCall *functionCall = (HLSLFunctionCall *)expr; + if (functionCall->function->numOutputArguments > 0) { + if (level > 0) { + return true; + } + } + return NeedsFlattening(functionCall->argument, level + 1) || NeedsFlattening(expr->nextExpression, level); + } + else { + //assert(false); + return false; + } +} + +struct StatementList { + HLSLStatement *head = NULL; + HLSLStatement *tail = NULL; + void append(HLSLStatement *st) + { + if (head == NULL) { + tail = head = st; + } + tail->nextStatement = st; + tail = st; + } +}; + +class ExpressionFlattener : public HLSLTreeVisitor { +public: + HLSLTree *m_tree; + int tmp_index; + HLSLStatement **statement_pointer; + HLSLFunction *current_function; + + ExpressionFlattener() + { + m_tree = NULL; + tmp_index = 0; + statement_pointer = NULL; + current_function = NULL; + } + virtual ~ExpressionFlattener() {} + + void FlattenExpressions(HLSLTree *tree) + { + m_tree = tree; + VisitRoot(tree->GetRoot()); + } + + // Visit all statements updating the statement_pointer so that we can insert and replace statements. @@ Add this to the default visitor? + virtual void VisitFunction(HLSLFunction *node) override + { + current_function = node; + statement_pointer = &node->statement; + VisitStatements(node->statement); + statement_pointer = NULL; + current_function = NULL; + } + + virtual void VisitComment(HLSLComment *node) override + { + // TODO: do nothing? + } + + virtual void VisitIfStatement(HLSLIfStatement *node) override + { + if (NeedsFlattening(node->condition, 1)) { + assert(false); // @@ Add statements before if statement. + } + + statement_pointer = &node->statement; + VisitStatements(node->statement); + if (node->elseStatement) { + statement_pointer = &node->elseStatement; + VisitStatements(node->elseStatement); + } + } + + virtual void VisitForStatement(HLSLForStatement *node) override + { + if (NeedsFlattening(node->initialization->assignment, 1)) { + assert(false); // @@ Add statements before for statement. + } + if (NeedsFlattening(node->condition, 1) || NeedsFlattening(node->increment, 1)) { + assert(false); // @@ These are tricky to implement. Need to handle all loop exits. + } + + statement_pointer = &node->statement; + VisitStatements(node->statement); + } + + virtual void VisitBlockStatement(HLSLBlockStatement *node) override + { + statement_pointer = &node->statement; + VisitStatements(node->statement); + } + + virtual void VisitStatements(HLSLStatement *statement) override + { + while (statement != NULL) { + VisitStatement(statement); + statement_pointer = &statement->nextStatement; + statement = statement->nextStatement; + } + } + + // This is usually a function call or assignment. + virtual void VisitExpressionStatement(HLSLExpressionStatement *node) override + { + if (NeedsFlattening(node->expression, 0)) { + StatementList statements; + Flatten(node->expression, statements, false); + + // Link beginning of statement list. + *statement_pointer = statements.head; + + // Link end of statement list. + HLSLStatement *tail = statements.tail; + tail->nextStatement = node->nextStatement; + + // Update statement pointer. + statement_pointer = &tail->nextStatement; + + // @@ Delete node? + } + } + + virtual void VisitDeclaration(HLSLDeclaration *node) override + { + // Skip global declarations. + if (statement_pointer == NULL) return; + + if (NeedsFlattening(node->assignment, 1)) { + StatementList statements; + HLSLIdentifierExpression *ident = Flatten(node->assignment, statements, true); + + // @@ Delete node->assignment? + + node->assignment = ident; + statements.append(node); + + // Link beginning of statement list. + *statement_pointer = statements.head; + + // Link end of statement list. + HLSLStatement *tail = statements.tail; + tail->nextStatement = node->nextStatement; + + // Update statement pointer. + statement_pointer = &tail->nextStatement; + } + } + + virtual void VisitReturnStatement(HLSLReturnStatement *node) override + { + if (NeedsFlattening(node->expression, 1)) { + StatementList statements; + HLSLIdentifierExpression *ident = Flatten(node->expression, statements, true); + + // @@ Delete node->expression? + + node->expression = ident; + statements.append(node); + + // Link beginning of statement list. + *statement_pointer = statements.head; + + // Link end of statement list. + HLSLStatement *tail = statements.tail; + tail->nextStatement = node->nextStatement; + + // Update statement pointer. + statement_pointer = &tail->nextStatement; + } + } + + HLSLDeclaration *BuildTemporaryDeclaration(HLSLExpression *expr) + { + assert(expr->expressionType.baseType != HLSLBaseType_Void); + + HLSLDeclaration *declaration = m_tree->AddNode(expr->fileName, expr->line); + declaration->name = m_tree->AddStringFormat("tmp%d", tmp_index++); + declaration->type = expr->expressionType; + declaration->assignment = expr; + + //HLSLIdentifierExpression * ident = (HLSLIdentifierExpression *)expr; + + return declaration; + } + + HLSLExpressionStatement *BuildExpressionStatement(HLSLExpression *expr) + { + HLSLExpressionStatement *statement = m_tree->AddNode(expr->fileName, expr->line); + statement->expression = expr; + return statement; + } + + HLSLIdentifierExpression *AddExpressionStatement(HLSLExpression *expr, StatementList &statements, bool wantIdent) + { + if (wantIdent) { + HLSLDeclaration *declaration = BuildTemporaryDeclaration(expr); + statements.append(declaration); + + HLSLIdentifierExpression *ident = m_tree->AddNode(expr->fileName, expr->line); + ident->name = declaration->name; + ident->expressionType = declaration->type; + return ident; + } + else { + HLSLExpressionStatement *statement = BuildExpressionStatement(expr); + statements.append(statement); + return NULL; + } + } + + HLSLIdentifierExpression *Flatten(HLSLExpression *expr, StatementList &statements, bool wantIdent = true) + { + if (!NeedsFlattening(expr, wantIdent)) { + return AddExpressionStatement(expr, statements, wantIdent); + } + + if (expr->nodeType == HLSLNodeType_UnaryExpression) { + assert(expr->nextExpression == NULL); + + HLSLUnaryExpression *unaryExpr = (HLSLUnaryExpression *)expr; + + HLSLIdentifierExpression *tmp = Flatten(unaryExpr->expression, statements, true); + + HLSLUnaryExpression *newUnaryExpr = m_tree->AddNode(unaryExpr->fileName, unaryExpr->line); + newUnaryExpr->unaryOp = unaryExpr->unaryOp; + newUnaryExpr->expression = tmp; + newUnaryExpr->expressionType = unaryExpr->expressionType; + + return AddExpressionStatement(newUnaryExpr, statements, wantIdent); + } + else if (expr->nodeType == HLSLNodeType_BinaryExpression) { + assert(expr->nextExpression == NULL); + + HLSLBinaryExpression *binaryExpr = (HLSLBinaryExpression *)expr; + + if (IsAssignOp(binaryExpr->binaryOp)) { + // Flatten right hand side only. + HLSLIdentifierExpression *tmp2 = Flatten(binaryExpr->expression2, statements, true); + + HLSLBinaryExpression *newBinaryExpr = m_tree->AddNode(binaryExpr->fileName, binaryExpr->line); + newBinaryExpr->binaryOp = binaryExpr->binaryOp; + newBinaryExpr->expression1 = binaryExpr->expression1; + newBinaryExpr->expression2 = tmp2; + newBinaryExpr->expressionType = binaryExpr->expressionType; + + return AddExpressionStatement(newBinaryExpr, statements, wantIdent); + } + else { + HLSLIdentifierExpression *tmp1 = Flatten(binaryExpr->expression1, statements, true); + HLSLIdentifierExpression *tmp2 = Flatten(binaryExpr->expression2, statements, true); + + HLSLBinaryExpression *newBinaryExpr = m_tree->AddNode(binaryExpr->fileName, binaryExpr->line); + newBinaryExpr->binaryOp = binaryExpr->binaryOp; + newBinaryExpr->expression1 = tmp1; + newBinaryExpr->expression2 = tmp2; + newBinaryExpr->expressionType = binaryExpr->expressionType; + + return AddExpressionStatement(newBinaryExpr, statements, wantIdent); + } + } + else if (expr->nodeType == HLSLNodeType_ConditionalExpression) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_CastingExpression) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_LiteralExpression) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_IdentifierExpression) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_ConstructorExpression) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_MemberAccess) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_ArrayAccess) { + assert(false); + } + else if (expr->nodeType == HLSLNodeType_FunctionCall) { + HLSLFunctionCall *functionCall = (HLSLFunctionCall *)expr; + + // @@ Output function as is? + // @@ We have to flatten function arguments! This is tricky, need to handle input/output arguments. + assert(!NeedsFlattening(functionCall->argument)); + + return AddExpressionStatement(expr, statements, wantIdent); + } + else { + assert(false); + } + return NULL; + } +}; + +void FlattenExpressions(HLSLTree *tree) +{ + ExpressionFlattener flattener; + flattener.FlattenExpressions(tree); +} + +} //namespace M4 diff --git a/hlslparser/src/HLSLTree.h b/hlslparser/src/HLSLTree.h new file mode 100644 index 00000000..39956bac --- /dev/null +++ b/hlslparser/src/HLSLTree.h @@ -0,0 +1,824 @@ +#pragma once + +#include + +#include "Engine.h" + +namespace M4 { + +enum HLSLTarget { + HLSLTarget_VertexShader, + HLSLTarget_PixelShader, + + HLSLTarget_ComputeShader, + + // none of these are portable to Metal/Android, they have own triangulation + // HLSLTarget_GeometryShader, + // HLSLTarget_HullShader, + // HLSLTarget_ControlShader, + + // This is compute prior to frag (combined vertex + geo state) + // HLSLTarget_MeshShader, +}; + +enum HLSLNodeType { + HLSLNodeType_Root, + + HLSLNodeType_Declaration, + HLSLNodeType_Struct, + HLSLNodeType_StructField, + HLSLNodeType_Buffer, + HLSLNodeType_BufferField, // TODO: or just ref structField + + HLSLNodeType_Function, + HLSLNodeType_Argument, + + HLSLNodeType_ExpressionStatement, + HLSLNodeType_Expression, + HLSLNodeType_ReturnStatement, + HLSLNodeType_DiscardStatement, + HLSLNodeType_BreakStatement, + HLSLNodeType_ContinueStatement, + HLSLNodeType_IfStatement, + HLSLNodeType_ForStatement, + HLSLNodeType_BlockStatement, + HLSLNodeType_UnaryExpression, + HLSLNodeType_BinaryExpression, + HLSLNodeType_ConditionalExpression, + HLSLNodeType_CastingExpression, + HLSLNodeType_LiteralExpression, + HLSLNodeType_IdentifierExpression, + HLSLNodeType_ConstructorExpression, + HLSLNodeType_MemberAccess, + HLSLNodeType_ArrayAccess, + HLSLNodeType_FunctionCall, + HLSLNodeType_MemberFunctionCall, + + /* FX file stuff + HLSLNodeType_StateAssignment, + HLSLNodeType_SamplerState, + HLSLNodeType_Pass, + HLSLNodeType_Technique, + HLSLNodeType_Pipeline, + HLSLNodeType_Stage, + */ + + HLSLNodeType_Attribute, + HLSLNodeType_Comment +}; + +enum HLSLBaseType { + HLSLBaseType_Unknown, + HLSLBaseType_Void, + + // float + HLSLBaseType_Float, + HLSLBaseType_Float2, + HLSLBaseType_Float3, + HLSLBaseType_Float4, + HLSLBaseType_Float2x2, + HLSLBaseType_Float3x3, + HLSLBaseType_Float4x4, + + HLSLBaseType_Half, + HLSLBaseType_Half2, + HLSLBaseType_Half3, + HLSLBaseType_Half4, + HLSLBaseType_Half2x2, + HLSLBaseType_Half3x3, + HLSLBaseType_Half4x4, + + HLSLBaseType_Double, + HLSLBaseType_Double2, + HLSLBaseType_Double3, + HLSLBaseType_Double4, + HLSLBaseType_Double2x2, + HLSLBaseType_Double3x3, + HLSLBaseType_Double4x4, + + // integer + HLSLBaseType_Bool, + HLSLBaseType_Bool2, + HLSLBaseType_Bool3, + HLSLBaseType_Bool4, + + HLSLBaseType_Int, + HLSLBaseType_Int2, + HLSLBaseType_Int3, + HLSLBaseType_Int4, + + HLSLBaseType_Uint, + HLSLBaseType_Uint2, + HLSLBaseType_Uint3, + HLSLBaseType_Uint4, + + HLSLBaseType_Short, + HLSLBaseType_Short2, + HLSLBaseType_Short3, + HLSLBaseType_Short4, + + HLSLBaseType_Ushort, + HLSLBaseType_Ushort2, + HLSLBaseType_Ushort3, + HLSLBaseType_Ushort4, + + HLSLBaseType_Long, + HLSLBaseType_Long2, + HLSLBaseType_Long3, + HLSLBaseType_Long4, + + HLSLBaseType_Ulong, + HLSLBaseType_Ulong2, + HLSLBaseType_Ulong3, + HLSLBaseType_Ulong4, + + // Seems like these should be subtype of HLSLTexture, but + // many of the intrinsics require a specific type of texture. + // MSL has many more types, included depth vs. regular textures. + HLSLBaseType_Texture2D, + HLSLBaseType_Texture3D, + HLSLBaseType_TextureCube, + HLSLBaseType_Texture2DArray, + HLSLBaseType_TextureCubeArray, + HLSLBaseType_Texture2DMS, + + HLSLBaseType_Depth2D, + HLSLBaseType_Depth2DArray, + HLSLBaseType_DepthCube, + // TODO: add more depth types as needed (pair with SamplerComparisonState) + + HLSLBaseType_RWTexture2D, + + // Only 2 sampler types. - type is for defining state inside them + HLSLBaseType_SamplerState, + HLSLBaseType_SamplerComparisonState, + + HLSLBaseType_UserDefined, // struct + HLSLBaseType_Expression, // type argument for defined() sizeof() and typeof(). + //HLSLBaseType_Auto, // this wasn't hooked up + HLSLBaseType_Comment, // single line comments optionally transferred to output + + // Buffer subtypes below + HLSLBaseType_Buffer, + + HLSLBaseType_Count, + + // counts + //HLSLBaseType_FirstNumeric = HLSLBaseType_Float, + //HLSLBaseType_LastNumeric = HLSLBaseType_Ulong4, + + //HLSLBaseType_FirstInteger = HLSLBaseType_Bool, + //HLSLBaseType_LastInteger = HLSLBaseType_LastNumeric, + + HLSLBaseType_NumericCount = HLSLBaseType_Ulong4 - HLSLBaseType_Float + 1 +}; + +// This a subtype to HLSLBaseType_Buffer +enum HLSLBufferType { + // DX9 + HLSLBufferType_CBuffer, + HLSLBufferType_TBuffer, + + // DX10 templated types + HLSLBufferType_ConstantBuffer, // indexable + HLSLBufferType_StructuredBuffer, + HLSLBufferType_RWStructuredBuffer, + HLSLBufferType_ByteAddressBuffer, + HLSLBufferType_RWByteAddressBuffer +}; + +enum HLSLBinaryOp { + // bit ops + HLSLBinaryOp_And, + HLSLBinaryOp_Or, + + // math ops + HLSLBinaryOp_Add, + HLSLBinaryOp_Sub, + HLSLBinaryOp_Mul, + HLSLBinaryOp_Div, + + // comparison ops + HLSLBinaryOp_Less, + HLSLBinaryOp_Greater, + HLSLBinaryOp_LessEqual, + HLSLBinaryOp_GreaterEqual, + HLSLBinaryOp_Equal, + HLSLBinaryOp_NotEqual, + + // bit ops + HLSLBinaryOp_BitAnd, + HLSLBinaryOp_BitOr, + HLSLBinaryOp_BitXor, + + // assign ops + HLSLBinaryOp_Assign, + HLSLBinaryOp_AddAssign, + HLSLBinaryOp_SubAssign, + HLSLBinaryOp_MulAssign, + HLSLBinaryOp_DivAssign, +}; + +inline bool IsCompareOp(HLSLBinaryOp op) +{ + return op == HLSLBinaryOp_Less || + op == HLSLBinaryOp_Greater || + op == HLSLBinaryOp_LessEqual || + op == HLSLBinaryOp_GreaterEqual || + op == HLSLBinaryOp_Equal || + op == HLSLBinaryOp_NotEqual; +} + +inline bool IsArithmeticOp(HLSLBinaryOp op) +{ + return op == HLSLBinaryOp_Add || + op == HLSLBinaryOp_Sub || + op == HLSLBinaryOp_Mul || + op == HLSLBinaryOp_Div; +} + +inline bool IsLogicOp(HLSLBinaryOp op) +{ + return op == HLSLBinaryOp_And || + op == HLSLBinaryOp_Or; +} + +inline bool IsAssignOp(HLSLBinaryOp op) +{ + return op == HLSLBinaryOp_Assign || + op == HLSLBinaryOp_AddAssign || + op == HLSLBinaryOp_SubAssign || + op == HLSLBinaryOp_MulAssign || + op == HLSLBinaryOp_DivAssign; +} + +inline bool IsBitOp(HLSLBinaryOp op) +{ + return op == HLSLBinaryOp_BitAnd || + op == HLSLBinaryOp_BitOr || + op == HLSLBinaryOp_BitXor; +} + +enum HLSLUnaryOp { + HLSLUnaryOp_Negative, // -x + HLSLUnaryOp_Positive, // +x + HLSLUnaryOp_Not, // !x + HLSLUnaryOp_PreIncrement, // ++x + HLSLUnaryOp_PreDecrement, // --x + HLSLUnaryOp_PostIncrement, // x++ + HLSLUnaryOp_PostDecrement, // x++ + HLSLUnaryOp_BitNot, // ~x +}; + +enum HLSLArgumentModifier { + HLSLArgumentModifier_None, + HLSLArgumentModifier_In, + HLSLArgumentModifier_Out, + HLSLArgumentModifier_Inout, + HLSLArgumentModifier_Uniform, + HLSLArgumentModifier_Const, +}; + +enum HLSLTypeFlags { + HLSLTypeFlag_None = 0, + HLSLTypeFlag_Const = 0x01, + HLSLTypeFlag_Static = 0x02, + //HLSLTypeFlag_Uniform = 0x04, + //HLSLTypeFlag_Extern = 0x10, + //HLSLTypeFlag_Volatile = 0x20, + //HLSLTypeFlag_Shared = 0x40, + //HLSLTypeFlag_Precise = 0x80, + + HLSLTypeFlag_Input = 0x100, + HLSLTypeFlag_Output = 0x200, + + // Interpolation modifiers. + HLSLTypeFlag_Linear = 0x10000, + HLSLTypeFlag_Centroid = 0x20000, + HLSLTypeFlag_NoInterpolation = 0x40000, + HLSLTypeFlag_NoPerspective = 0x80000, + HLSLTypeFlag_Sample = 0x100000, + + // Misc. + HLSLTypeFlag_NoPromote = 0x200000, +}; + +enum HLSLAttributeType { + HLSLAttributeType_Unknown, + + // TODO: a lot more attributes, these are loop attributes + // f.e. specialization constant and numthreads for HLSL + HLSLAttributeType_Unroll, + HLSLAttributeType_Branch, + HLSLAttributeType_Flatten, + HLSLAttributeType_NoFastMath, + +}; + +enum HLSLAddressSpace { + HLSLAddressSpace_Undefined, + + // These only apply to MSL + HLSLAddressSpace_Constant, + HLSLAddressSpace_Device, + HLSLAddressSpace_Thread, + HLSLAddressSpace_Shared, + // TODO: Threadgroup, + // TODO: ThreadgroupImageblock +}; + +struct HLSLNode; +struct HLSLRoot; +struct HLSLStatement; +struct HLSLAttribute; +struct HLSLDeclaration; +struct HLSLStruct; +struct HLSLStructField; +struct HLSLBuffer; +struct HLSLFunction; +struct HLSLArgument; +struct HLSLExpressionStatement; +struct HLSLExpression; +struct HLSLBinaryExpression; +struct HLSLLiteralExpression; +struct HLSLIdentifierExpression; +struct HLSLConstructorExpression; +struct HLSLFunctionCall; +struct HLSLArrayAccess; +struct HLSLAttribute; + +struct HLSLType { + explicit HLSLType(HLSLBaseType _baseType = HLSLBaseType_Unknown) + { + baseType = _baseType; + } + bool TestFlags(int flags_) const { return (flags & flags_) == flags_; } + + HLSLBaseType baseType = HLSLBaseType_Unknown; + HLSLBaseType formatType = HLSLBaseType_Float; // Half or Float (only applies to templated params like buffer/texture) + const char* typeName = NULL; // For user defined types. + bool array = false; + HLSLExpression* arraySize = NULL; // can ref constant like NUM_LIGHTS + int flags = 0; + HLSLAddressSpace addressSpace = HLSLAddressSpace_Undefined; // MSL mostly +}; + +// Only Statment, Argument, StructField can be marked hidden. +// But many elements like Buffer derive from Statement. + +/// Base class for all nodes in the HLSL AST +struct HLSLNode { + HLSLNodeType nodeType; // set to s_type + const char* fileName = NULL; + int line = 0; +}; + +struct HLSLRoot : public HLSLNode { + static const HLSLNodeType s_type = HLSLNodeType_Root; + HLSLStatement* statement = NULL; // First statement. +}; + +struct HLSLStatement : public HLSLNode { + HLSLStatement* nextStatement = NULL; // Next statement in the block. + HLSLAttribute* attributes = NULL; + + // This allows tree pruning. Marked true after traversing use in + mutable bool hidden = false; + + // This is marked as false at start, and multi endpoint traversal marks + // when a global is already written, and next write is skipped. + mutable bool written = false; +}; + +// [unroll] +struct HLSLAttribute : public HLSLNode { + static const HLSLNodeType s_type = HLSLNodeType_Attribute; + HLSLAttributeType attributeType = HLSLAttributeType_Unknown; + HLSLExpression* argument = NULL; + HLSLAttribute* nextAttribute = NULL; +}; + +struct HLSLDeclaration : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_Declaration; + const char* name = NULL; + HLSLType type; + const char* registerName = NULL; // @@ Store register index? + const char* semantic = NULL; + HLSLDeclaration* nextDeclaration = NULL; // If multiple variables declared on a line. + HLSLExpression* assignment = NULL; + + HLSLBuffer* buffer = NULL; // reference cbuffer for decl +}; + +struct HLSLStruct : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_Struct; + const char* name = NULL; + HLSLStructField* field = NULL; // First field in the structure. +}; + +struct HLSLStructField : public HLSLNode { + static const HLSLNodeType s_type = HLSLNodeType_StructField; + const char* name = NULL; + HLSLType type; + const char* semantic = NULL; + const char* sv_semantic = NULL; + HLSLStructField* nextField = NULL; // Next field in the structure. + bool hidden = false; +}; + +/// Buffer declaration. +struct HLSLBuffer : public HLSLStatement { + // These spill a ton of globals throughout shader + bool IsGlobalFields() const + { + return bufferType == HLSLBufferType_CBuffer || + bufferType == HLSLBufferType_TBuffer; + } + + // DX changes registers for read-only vs. read-write buffers (SRV vs. UAV) + // so constant/cbuffer use b, structured/byte use t (like textures), + // and read-write use u. MSL only has u and + bool IsReadOnly() const + { + return bufferType == HLSLBufferType_CBuffer || + bufferType == HLSLBufferType_TBuffer || + bufferType == HLSLBufferType_ConstantBuffer || + bufferType == HLSLBufferType_StructuredBuffer || + bufferType == HLSLBufferType_ByteAddressBuffer; + } + + static const HLSLNodeType s_type = HLSLNodeType_Buffer; + const char* name = NULL; + const char* registerName = NULL; + HLSLDeclaration* field = NULL; + HLSLBufferType bufferType = HLSLBufferType_CBuffer; + HLSLStruct* bufferStruct = NULL; +}; + +/// Function declaration +struct HLSLFunction : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_Function; + const char* name = NULL; + HLSLType returnType; + HLSLBaseType memberType = HLSLBaseType_Unknown; // for sampler members, must also look at GetScalarType(returnType) + const char* semantic = NULL; + const char* sv_semantic = NULL; + int numArguments = 0; + int numOutputArguments = 0; // Includes out and inout arguments. + HLSLArgument* argument = NULL; + HLSLStatement* statement = NULL; + HLSLFunction* forward = NULL; // Which HLSLFunction this one forward-declares + + bool IsMemberFunction() const { return memberType != HLSLBaseType_Unknown; } +}; + +/// Declaration of an argument to a function. +struct HLSLArgument : public HLSLNode { + static const HLSLNodeType s_type = HLSLNodeType_Argument; + const char* name = NULL; + HLSLArgumentModifier modifier = HLSLArgumentModifier_None; + HLSLType type; + const char* semantic = NULL; + const char* sv_semantic = NULL; + HLSLExpression* defaultValue = NULL; + HLSLArgument* nextArgument = NULL; + bool hidden = false; +}; + +/// A expression which forms a complete statement. +struct HLSLExpressionStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_ExpressionStatement; + HLSLExpression* expression = NULL; +}; + +struct HLSLReturnStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_ReturnStatement; + HLSLExpression* expression = NULL; +}; + +struct HLSLDiscardStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_DiscardStatement; +}; + +struct HLSLBreakStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_BreakStatement; +}; + +struct HLSLContinueStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_ContinueStatement; +}; + +struct HLSLIfStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_IfStatement; + HLSLExpression* condition = NULL; + HLSLStatement* statement = NULL; + HLSLStatement* elseStatement = NULL; + bool isStatic = false; +}; + +struct HLSLForStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_ForStatement; + HLSLDeclaration* initialization = NULL; + HLSLExpression* condition = NULL; + HLSLExpression* increment = NULL; + HLSLStatement* statement = NULL; +}; + +struct HLSLBlockStatement : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_BlockStatement; + HLSLStatement* statement = NULL; +}; + +/// Base type for all types of expressions. +struct HLSLExpression : public HLSLNode { + static const HLSLNodeType s_type = HLSLNodeType_Expression; + HLSLType expressionType; + HLSLExpression* nextExpression = NULL; // Used when the expression is part of a list, like in a function call. +}; + +// -a +struct HLSLUnaryExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_UnaryExpression; + HLSLUnaryOp unaryOp = {}; + HLSLExpression* expression = NULL; +}; + +/// a + b +struct HLSLBinaryExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_BinaryExpression; + HLSLBinaryOp binaryOp = {}; + HLSLExpression* expression1 = NULL; + HLSLExpression* expression2 = NULL; +}; + +/// ? : construct +struct HLSLConditionalExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_ConditionalExpression; + HLSLExpression* condition = NULL; + HLSLExpression* trueExpression = NULL; + HLSLExpression* falseExpression = NULL; +}; + +/// v = (half4)v2 +struct HLSLCastingExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_CastingExpression; + HLSLType type; + HLSLExpression* expression = NULL; +}; + +/// Float, integer, boolean, etc. literal constant. +struct HLSLLiteralExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_LiteralExpression; + HLSLBaseType type = HLSLBaseType_Unknown; // Note, not all types can be literals. + union { + bool bValue; + float fValue; + int32_t iValue; + }; +}; + +/// An identifier, typically a variable name or structure field name. +struct HLSLIdentifierExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_IdentifierExpression; + const char* name = NULL; + bool global = false; // This is a global variable. +}; + +/// float2(1, 2) +struct HLSLConstructorExpression : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_ConstructorExpression; + HLSLType type; + HLSLExpression* argument = NULL; +}; + +/// object.member input.member or input[10].member +struct HLSLMemberAccess : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_MemberAccess; + HLSLExpression* object = NULL; + const char* field = NULL; + bool swizzle = false; +}; + +/// array[index] +struct HLSLArrayAccess : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_ArrayAccess; + HLSLExpression* array = NULL; + HLSLExpression* index = NULL; +}; + +/// c-style foo(arg1, arg2) - args can have defaults that are parsed +struct HLSLFunctionCall : public HLSLExpression { + static const HLSLNodeType s_type = HLSLNodeType_FunctionCall; + const HLSLFunction* function = NULL; + HLSLExpression* argument = NULL; + int numArguments = 0; +}; + +// TODO: finish adding this for texture and buffer ops +/// c++ style member.foo(arg1, arg2) +struct HLSLMemberFunctionCall : public HLSLFunctionCall { + static const HLSLNodeType s_type = HLSLNodeType_MemberFunctionCall; + + // could be buffer, texture, raytrace + const HLSLIdentifierExpression* memberIdentifier = NULL; +}; + +#if 1 +/* +// These are all FX file constructs +// TODO: may remove these, they just complicate the code +// but do want to specify mix of vs/ps/cs in single files + +// fx +struct HLSLStateAssignment : public HLSLNode +{ + static const HLSLNodeType s_type = HLSLNodeType_StateAssignment; + const char* stateName = NULL; + int d3dRenderState = 0; + union { + int32_t iValue; + float fValue; + const char * sValue; + }; + HLSLStateAssignment* nextStateAssignment = NULL; +}; + +// fx +struct HLSLSamplerState : public HLSLExpression // @@ Does this need to be an expression? Does it have a type? I guess type is useful. +{ + static const HLSLNodeType s_type = HLSLNodeType_SamplerState; + int numStateAssignments = 0; + HLSLStateAssignment* stateAssignments = NULL; +}; + +// fx +struct HLSLPass : public HLSLNode +{ + static const HLSLNodeType s_type = HLSLNodeType_Pass; + const char* name = NULL; + int numStateAssignments = 0; + HLSLStateAssignment* stateAssignments = NULL; + HLSLPass* nextPass = NULL; +}; + +// fx +struct HLSLTechnique : public HLSLStatement +{ + static const HLSLNodeType s_type = HLSLNodeType_Technique; + const char* name = NULL; + int numPasses = 0; + HLSLPass* passes = NULL; +}; + +// fx +struct HLSLPipeline : public HLSLStatement +{ + static const HLSLNodeType s_type = HLSLNodeType_Pipeline; + const char* name = NULL; + int numStateAssignments = 0; + HLSLStateAssignment* stateAssignments = NULL; +}; + +// fx +struct HLSLStage : public HLSLStatement +{ + static const HLSLNodeType s_type = HLSLNodeType_Stage; + const char* name = NULL; + HLSLStatement* statement = NULL; + HLSLDeclaration* inputs = NULL; + HLSLDeclaration* outputs = NULL; +}; +*/ +#endif + +struct HLSLComment : public HLSLStatement { + static const HLSLNodeType s_type = HLSLNodeType_Comment; + const char* text = NULL; +}; + +/// Abstract syntax tree for parsed HLSL code. +class HLSLTree { +public: + explicit HLSLTree(Allocator* allocator); + ~HLSLTree(); + + /// Adds a string to the string pool used by the tree. + const char* AddString(const char* string); + const char* AddStringFormat(const char* string, ...) M4_PRINTF_ATTR(2, 3); + + /// Returns true if the string is contained within the tree. + bool GetContainsString(const char* string) const; + + /// Returns the root block in the tree */ + HLSLRoot* GetRoot() const; + + /// Adds a new node to the tree with the specified type. + template + T* AddNode(const char* fileName, int line) + { + HLSLNode* node = new (AllocateMemory(sizeof(T))) T(); + node->nodeType = T::s_type; + node->fileName = fileName; + node->line = line; + return static_cast(node); + } + + HLSLFunction* FindFunction(const char* name); + HLSLDeclaration* FindGlobalDeclaration(const char* name, HLSLBuffer** buffer_out = NULL); + + HLSLStruct* FindGlobalStruct(const char* name); + HLSLBuffer* FindBuffer(const char* name); + + // FX files + // HLSLTechnique * FindTechnique(const char * name); + // HLSLPipeline * FindFirstPipeline(); + // HLSLPipeline * FindNextPipeline(HLSLPipeline * current); + // HLSLPipeline * FindPipeline(const char * name); + + bool GetExpressionValue(HLSLExpression* expression, int& value); + int GetExpressionValue(HLSLExpression* expression, float values[4]); + + bool NeedsFunction(const char* name); + +private: + void* AllocateMemory(size_t size); + void AllocatePage(); + +private: + static const size_t s_nodePageSize = 1024 * 4; + + struct NodePage { + NodePage* next; + char buffer[s_nodePageSize]; + }; + + Allocator* m_allocator; + StringPool m_stringPool; + HLSLRoot* m_root; + + NodePage* m_firstPage; + NodePage* m_currentPage; + size_t m_currentPageOffset; +}; + +class HLSLTreeVisitor { +public: + virtual ~HLSLTreeVisitor() {} + virtual void VisitType(HLSLType& type); + + virtual void VisitRoot(HLSLRoot* node); + virtual void VisitTopLevelStatement(HLSLStatement* node); + virtual void VisitStatements(HLSLStatement* statement); + virtual void VisitStatement(HLSLStatement* node); + virtual void VisitDeclaration(HLSLDeclaration* node); + virtual void VisitStruct(HLSLStruct* node); + virtual void VisitStructField(HLSLStructField* node); + virtual void VisitBuffer(HLSLBuffer* node); + //virtual void VisitBufferField(HLSLBufferField * node); // TODO: + virtual void VisitFunction(HLSLFunction* node); + virtual void VisitArgument(HLSLArgument* node); + virtual void VisitExpressionStatement(HLSLExpressionStatement* node); + virtual void VisitExpression(HLSLExpression* node); + virtual void VisitReturnStatement(HLSLReturnStatement* node); + virtual void VisitDiscardStatement(HLSLDiscardStatement* node); + virtual void VisitBreakStatement(HLSLBreakStatement* node); + virtual void VisitContinueStatement(HLSLContinueStatement* node); + virtual void VisitIfStatement(HLSLIfStatement* node); + virtual void VisitForStatement(HLSLForStatement* node); + virtual void VisitBlockStatement(HLSLBlockStatement* node); + virtual void VisitUnaryExpression(HLSLUnaryExpression* node); + virtual void VisitBinaryExpression(HLSLBinaryExpression* node); + virtual void VisitConditionalExpression(HLSLConditionalExpression* node); + virtual void VisitCastingExpression(HLSLCastingExpression* node); + virtual void VisitLiteralExpression(HLSLLiteralExpression* node); + virtual void VisitIdentifierExpression(HLSLIdentifierExpression* node); + virtual void VisitConstructorExpression(HLSLConstructorExpression* node); + virtual void VisitMemberAccess(HLSLMemberAccess* node); + virtual void VisitArrayAccess(HLSLArrayAccess* node); + virtual void VisitFunctionCall(HLSLFunctionCall* node); + + virtual void VisitComment(HLSLComment* node); + + virtual void VisitFunctions(HLSLRoot* root); + virtual void VisitParameters(HLSLRoot* root); + + HLSLFunction* FindFunction(HLSLRoot* root, const char* name); + HLSLDeclaration* FindGlobalDeclaration(HLSLRoot* root, const char* name); + HLSLStruct* FindGlobalStruct(HLSLRoot* root, const char* name); + + // These are fx file constructs + // virtual void VisitStateAssignment(HLSLStateAssignment * node); + // virtual void VisitSamplerState(HLSLSamplerState * node); + // virtual void VisitPass(HLSLPass * node); + // virtual void VisitTechnique(HLSLTechnique * node); + // virtual void VisitPipeline(HLSLPipeline * node); +}; + +// Tree transformations: +extern void PruneTree(HLSLTree* tree, const char* entryName0, const char* entryName1 = NULL); +extern void SortTree(HLSLTree* tree); +//extern void GroupParameters(HLSLTree* tree); +extern void HideUnusedArguments(HLSLFunction* function); +extern void FlattenExpressions(HLSLTree* tree); + +} //namespace M4 diff --git a/hlslparser/src/MSLGenerator.cpp b/hlslparser/src/MSLGenerator.cpp new file mode 100644 index 00000000..faffb254 --- /dev/null +++ b/hlslparser/src/MSLGenerator.cpp @@ -0,0 +1,2119 @@ +//============================================================================= +// +// Render/MSLGenerator.cpp +// +// Created by Max McGuire (max@unknownworlds.com) +// Copyright (c) 2013, Unknown Worlds Entertainment, Inc. +// +//============================================================================= + +#include "MSLGenerator.h" + +#include + +#include "Engine.h" +#include "HLSLParser.h" +#include "HLSLTree.h" + +// MSL limitations: +// - Some type conversions and constructors don't work exactly the same way. For example, casts to smaller size vectors are not alloweed in C++. @@ Add more details... +// - Swizzles on scalar types, whether or not it expands them. a_float.x, a_float.xxxx both cause compile errors. +// - Using ints as floats without the trailing .0 makes the compiler sad. +// Unsupported by this generator: +// - Matrix [] access is implemented as a function call, so result cannot be passed as out/inout argument. +// - Matrix [] access is not supported in all l-value expressions. Only simple assignments. +// - No support for boolean vectors and logical operators involving vectors. This is not just in metal. +// - No support for non-float texture types + +namespace M4 { +static void ParseSemantic(const char* semantic, uint32_t* outputLength, uint32_t* outputIndex) +{ + const char* semanticIndex = semantic; + + while (*semanticIndex && !isdigit(*semanticIndex)) { + semanticIndex++; + } + + *outputLength = (uint32_t)(semanticIndex - semantic); + *outputIndex = atoi(semanticIndex); +} + +// Parse register name and advance next register index. +static int ParseRegister(const char* registerName, int& nextRegister) +{ + if (!registerName) { + return nextRegister++; + } + + // skip over the u/b/t register prefix + while (*registerName && !isdigit(*registerName)) { + registerName++; + } + + if (!*registerName) { + return nextRegister++; + } + + // parse the number + int result = atoi(registerName); + + if (nextRegister <= result) { + nextRegister = result + 1; + } + + return result; +} + +MSLGenerator::MSLGenerator() +{ + m_tree = NULL; + m_entryName = NULL; + m_target = HLSLTarget_VertexShader; + m_error = false; + + m_firstClassArgument = NULL; + m_lastClassArgument = NULL; + + m_currentFunction = NULL; +} + +// Copied from GLSLGenerator +void MSLGenerator::Error(const char* format, ...) const +{ + // It's not always convenient to stop executing when an error occurs, + // so just track once we've hit an error and stop reporting them until + // we successfully bail out of execution. + if (m_error) { + return; + } + m_error = true; + + va_list arg; + va_start(arg, format); + Log_ErrorArgList(format, arg); + va_end(arg); +} + +inline void MSLGenerator::AddClassArgument(ClassArgument* arg) +{ + if (m_firstClassArgument == NULL) { + m_firstClassArgument = arg; + } + else { + m_lastClassArgument->nextArg = arg; + } + m_lastClassArgument = arg; +} + +void MSLGenerator::Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction) +{ + // Hide unused arguments. @@ It would be good to do this in the other generators too. + + // PruneTree resets hidden flags to true, then marks visible elements + // based on whether entry point visits them. + PruneTree(tree, entryFunction->name); // Note: takes second entry + + // This sorts tree by type, but keeps ordering + SortTree(tree); + + // This strips any unused inputs to the entry point function + HideUnusedArguments(entryFunction); + + // Note sure if/where to add these calls. Just wanted to point + // out that nothing is calling them, but could be useful. + FlattenExpressions(tree); + + HLSLRoot* root = tree->GetRoot(); + HLSLStatement* statement = root->statement; + ASSERT(m_firstClassArgument == NULL); + + //HLSLType samplerType(HLSLBaseType_Sampler); + + int nextTextureRegister = 0; + int nextSamplerRegister = 0; + int nextBufferRegister = 0; + + while (statement != NULL) { + if (statement->hidden) { + statement = statement->nextStatement; + continue; + } + + if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration* declaration = (HLSLDeclaration*)statement; + + if (IsTextureType(declaration->type)) { + const char* textureName = declaration->name; + + int textureRegister = ParseRegister(declaration->registerName, nextTextureRegister); + const char* textureRegisterName = m_tree->AddStringFormat("texture(%d)", textureRegister); + + if (declaration->type.addressSpace == HLSLAddressSpace_Undefined) + declaration->type.addressSpace = HLSLAddressSpace_Device; + + AddClassArgument(new ClassArgument(textureName, declaration->type, textureRegisterName, true)); + } + else if (IsSamplerType(declaration->type)) { + const char* samplerName = declaration->name; + + int samplerRegister = ParseRegister(declaration->registerName, nextSamplerRegister); + const char* samplerRegisterName = m_tree->AddStringFormat("sampler(%d)", samplerRegister); + + if (declaration->type.addressSpace == HLSLAddressSpace_Undefined) + declaration->type.addressSpace = HLSLAddressSpace_Device; + + AddClassArgument(new ClassArgument(samplerName, declaration->type, samplerRegisterName, true)); + } + } + else if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer* buffer = (HLSLBuffer*)statement; + + HLSLType type(HLSLBaseType_UserDefined); + + // TODO: on cbuffer is a ubo, not tbuffer, or others + // TODO: this is having to rename due to globals + if (buffer->IsGlobalFields()) + type.typeName = m_tree->AddStringFormat("%s_ubo", buffer->name); + else + type.typeName = m_tree->AddStringFormat("%s", buffer->bufferStruct->name); + + // TODO: ConstantBuffer can use ptr notation, detect array decl + bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer || + buffer->IsGlobalFields(); + + if (buffer->IsReadOnly()) + type.addressSpace = HLSLAddressSpace_Constant; + else + type.addressSpace = HLSLAddressSpace_Device; + + int bufferRegister = ParseRegister(buffer->registerName, nextBufferRegister) + m_options.bufferRegisterOffset; + + const char* bufferRegisterName = m_tree->AddStringFormat("buffer(%d)", bufferRegister); + + AddClassArgument(new ClassArgument(buffer->name, type, bufferRegisterName, isRef)); + } + + statement = statement->nextStatement; + } + + // @@ IC: instance_id parameter must be a function argument. If we find it inside a struct we must move it to the function arguments + // and patch all the references to it! + + // Translate semantics. + HLSLArgument* argument = entryFunction->argument; + while (argument != NULL) { + if (argument->hidden) { + argument = argument->nextArgument; + continue; + } + + if (argument->modifier == HLSLArgumentModifier_Out) { + // Translate output arguments semantics. + if (argument->type.baseType == HLSLBaseType_UserDefined) { + // Our vertex input is a struct and its fields need to be tagged when we generate that + HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName); + if (structure == NULL) { + Error("Vertex shader output struct '%s' not found in shader\n", argument->type.typeName); + } + + HLSLStructField* field = structure->field; + while (field != NULL) { + if (!field->hidden) { + field->sv_semantic = TranslateOutputSemantic(field->semantic); + } + field = field->nextField; + } + } + else { + argument->sv_semantic = TranslateOutputSemantic(argument->semantic); + } + } + else { + // Translate input arguments semantics. + if (argument->type.baseType == HLSLBaseType_UserDefined) { + // Our vertex input is a struct and its fields need to be tagged when we generate that + HLSLStruct* structure = tree->FindGlobalStruct(argument->type.typeName); + if (structure == NULL) { + Error("Vertex shader input struct '%s' not found in shader\n", argument->type.typeName); + } + + HLSLStructField* field = structure->field; + while (field != NULL) { + if (!field->hidden) { + field->sv_semantic = TranslateInputSemantic(field->semantic); + + // Force type to uint. + if (field->sv_semantic && String_Equal(field->sv_semantic, "sample_id")) { + field->type.baseType = HLSLBaseType_Uint; + field->type.flags |= HLSLTypeFlag_NoPromote; + } + + /*if (target == HLSLTarget_VertexShader && is_semantic(field->semantic, "COLOR")) + { + field->type.flags |= HLSLTypeFlag_Swizzle_BGRA; + }*/ + } + field = field->nextField; + } + } + else { + argument->sv_semantic = TranslateInputSemantic(argument->semantic); + + // Force type to uint. + if (argument->sv_semantic && String_Equal(argument->sv_semantic, "sample_id")) { + argument->type.baseType = HLSLBaseType_Uint; + argument->type.flags |= HLSLTypeFlag_NoPromote; + } + } + } + + argument = argument->nextArgument; + } + + // Translate return value semantic. + if (entryFunction->returnType.baseType != HLSLBaseType_Void) { + if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined) { + // Our vertex input is a struct and its fields need to be tagged when we generate that + HLSLStruct* structure = tree->FindGlobalStruct(entryFunction->returnType.typeName); + if (structure == NULL) { + Error("Vertex shader output struct '%s' not found in shader\n", entryFunction->returnType.typeName); + } + + HLSLStructField* field = structure->field; + while (field != NULL) { + if (!field->hidden) { + field->sv_semantic = TranslateOutputSemantic(field->semantic); + } + field = field->nextField; + } + } + else { + entryFunction->sv_semantic = TranslateOutputSemantic(entryFunction->semantic); + + //Error("MSL only supports COLOR semantic in return \n", entryFunction->returnType.typeName); + } + } +} + +void MSLGenerator::CleanPrepass() +{ + ClassArgument* currentArg = m_firstClassArgument; + while (currentArg != NULL) { + ClassArgument* nextArg = currentArg->nextArg; + delete currentArg; + currentArg = nextArg; + } + delete currentArg; + m_firstClassArgument = NULL; + m_lastClassArgument = NULL; +} + +void MSLGenerator::PrependDeclarations() +{ + // Any special function stubs we need go here + // That includes special constructors to emulate HLSL not being strict + + //Branch internally to HLSL vs. MSL verision + m_writer.WriteLine(0, "#include \"ShaderMSL.h\""); +} + +// Any reference or pointer must be qualified with address space in MSL +const char* MSLGenerator::GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const +{ + if (IsSamplerType(baseType)) { + return "thread"; + } + if (IsTextureType(baseType)) { + return "thread"; + } + + // buffers also need to handle readonly (constant and const device) vs. + // readwrite (device). + + switch (addressSpace) { + case HLSLAddressSpace_Constant: + return "constant"; + case HLSLAddressSpace_Device: + return "device"; + case HLSLAddressSpace_Thread: + return "thread"; + case HLSLAddressSpace_Shared: + return "shared"; + //case HLSLAddressSpace_Threadgroup: return "threadgroup_local"; + //case HLSLAddressSpace_ThreadgroupImageblock: return "threadgroup_imageblock"); + + case HLSLAddressSpace_Undefined: + break; + } + + Error("Unknown address space"); + return ""; +} + +bool MSLGenerator::Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options) +{ + m_firstClassArgument = NULL; + m_lastClassArgument = NULL; + + m_tree = tree; + m_target = target; + m_entryName = entryName; + + m_options = options; + m_writer.SetWriteFileLine(options.writeFileLine); + + m_writer.Reset(); + + // Find entry point function + HLSLFunction* entryFunction = tree->FindFunction(entryName); + if (entryFunction == NULL) { + Error("Entry point '%s' doesn't exist\n", entryName); + return false; + } + + Prepass(tree, target, entryFunction); + + PrependDeclarations(); + + HLSLRoot* root = m_tree->GetRoot(); + + OutputStaticDeclarations(0, root->statement); + + // In MSL, uniforms are parameters for the entry point, not globals: + // to limit code rewriting, we wrap the entire original shader into a class. + // Uniforms are then passed to the constructor and copied to member variables. + std::string shaderClassNameStr = entryName; + shaderClassNameStr += "NS"; // to distinguish from function + + const char* shaderClassName = shaderClassNameStr.c_str(); + m_writer.WriteLine(0, "struct %s {", shaderClassName); + + OutputStatements(1, root->statement); + + // Generate constructor + m_writer.WriteLine(0, ""); + m_writer.BeginLine(1); + + m_writer.Write("%s(", shaderClassName); + + // mod + int indent = m_writer.EndLine(); + m_writer.BeginLine(indent + 1); // 1 more level for params + + const ClassArgument* currentArg = m_firstClassArgument; + while (currentArg != NULL) { + m_writer.Write("%s ", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace)); + + // ref vs. ptr + bool isRef = currentArg->isRef; + + m_writer.Write("%s %s %s", GetTypeName(currentArg->type, /*exactType=*/true), isRef ? "&" : "*", currentArg->name); + + currentArg = currentArg->nextArg; + if (currentArg) { + m_writer.Write(", "); + + // mod + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + } + } + m_writer.Write(")"); + + // mod + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + + currentArg = m_firstClassArgument; + if (currentArg) { + m_writer.Write(" : "); + } + while (currentArg != NULL) { + m_writer.Write("%s(%s)", currentArg->name, currentArg->name); + currentArg = currentArg->nextArg; + if (currentArg) { + m_writer.Write(", "); + + // mod + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + } + } + m_writer.EndLine(" {}"); + + m_writer.WriteLine(0, "};"); // Class + + // Generate real entry point, the one called by Metal + m_writer.WriteLine(0, ""); + + // If function return value has a non-color output semantic, declare a temporary struct for the output. + bool wrapReturnType = false; + if (entryFunction->sv_semantic != NULL && !String_Equal(entryFunction->sv_semantic, "color(0)")) { + wrapReturnType = true; + + m_writer.WriteLine(0, "struct %s_output { %s tmp [[%s]]; };", entryName, GetTypeName(entryFunction->returnType, /*exactType=*/true), entryFunction->sv_semantic); + + m_writer.WriteLine(0, ""); + } + + m_writer.BeginLine(0); + + // @@ Add/Translate function attributes. + // entryFunction->attributes + + // TODO: hack, since don't actually parse bracket construct yet + // MSL doesn't seem to have this, set from code? + // if (m_target == HLSLTarget_ComputeShader) + // m_writer.WriteLine(indent, "[numthreads(1,1,1)]"); + + switch (m_target) { + case HLSLTarget_VertexShader: + m_writer.Write("vertex "); + break; + case HLSLTarget_PixelShader: + m_writer.Write("fragment "); + break; + case HLSLTarget_ComputeShader: + m_writer.Write("kernel "); + break; + } + + // Return type. + if (wrapReturnType) { + m_writer.Write("%s_output", entryName); + } + else { + if (entryFunction->returnType.baseType == HLSLBaseType_UserDefined) { + // Alec removing namespaced structs + // m_writer.Write("%s::", shaderClassName); + } + m_writer.Write("%s", GetTypeName(entryFunction->returnType, /*exactType=*/true)); + } + + m_writer.Write(" %s(", entryName); + + // Alec added for readability + indent = m_writer.EndLine(); + + m_writer.BeginLine(indent + 1); // indent more + + //-------------------- + // This is the class taking in arguments + + int argumentCount = 0; + HLSLArgument* argument = entryFunction->argument; + while (argument != NULL) { + if (argument->hidden) { + argument = argument->nextArgument; + continue; + } + + if (argument->type.baseType == HLSLBaseType_UserDefined) { + //TODO: aled removing namespaced structs + //m_writer.Write("%s::", shaderClassName); + } + m_writer.Write("%s %s", GetTypeName(argument->type, /*exactType=*/true), argument->name); + + // @@ IC: We are assuming that the first argument is the 'stage_in'. + if (argument->type.baseType == HLSLBaseType_UserDefined && argument == entryFunction->argument) { + m_writer.Write(" [[stage_in]]"); + } + else if (argument->sv_semantic) { + m_writer.Write(" [[%s]]", argument->sv_semantic); + } + + argumentCount++; + + argument = argument->nextArgument; + if (argument && !argument->hidden) { + m_writer.Write(", "); + + // Alec added for readability + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + } + } + + // These are additional inputs/outputs not [[stage_in]] + + currentArg = m_firstClassArgument; + if (argumentCount && currentArg != NULL) { + m_writer.Write(","); + + // Alec added for readability + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + } + while (currentArg != NULL) { + if (currentArg->type.baseType == HLSLBaseType_UserDefined) { + bool isRef = currentArg->isRef; + + m_writer.Write("%s %s %s %s [[%s]]", GetAddressSpaceName(currentArg->type.baseType, currentArg->type.addressSpace), + // shaderClassName, + currentArg->type.typeName, isRef ? "&" : "*", currentArg->name, + currentArg->registerName); + } + else { + m_writer.Write("%s %s [[%s]]", GetTypeName(currentArg->type, /*exactType=*/true), currentArg->name, currentArg->registerName); + } + + currentArg = currentArg->nextArg; + if (currentArg) { + m_writer.Write(", "); + } + + // Alec added for readability + indent = m_writer.EndLine(); + m_writer.BeginLine(indent); + } + m_writer.EndLine(")"); + m_writer.WriteLine(0, "{"); + + // Create the helper class instance and call the entry point from the original shader + m_writer.BeginLine(1); + m_writer.Write("%s %s", shaderClassName, entryName); + + currentArg = m_firstClassArgument; + if (currentArg) { + m_writer.Write("("); + + while (currentArg != NULL) { + m_writer.Write("%s", currentArg->name); + currentArg = currentArg->nextArg; + if (currentArg) { + m_writer.Write(", "); + + // indent = m_writer.EndLine(); + // m_writer.BeginLine(indent); + } + } + + m_writer.Write(")"); + } + m_writer.EndLine(";"); + + m_writer.BeginLine(1); + + if (wrapReturnType) { + m_writer.Write("%s_output output; output.tmp = %s.%s(", entryName, entryName, entryName); + } + else { + m_writer.Write("return %s.%s(", entryName, entryName); + } + + argument = entryFunction->argument; + while (argument != NULL) { + if (!argument->hidden) { + m_writer.Write("%s", argument->name); + } + argument = argument->nextArgument; + if (argument && !argument->hidden) { + m_writer.Write(", "); + } + } + + m_writer.EndLine(");"); + + if (wrapReturnType) { + m_writer.WriteLine(1, "return output;"); + } + + m_writer.WriteLine(0, "}"); + + CleanPrepass(); + m_tree = NULL; + + // Any final check goes here, but shouldn't be needed as the Metal compiler is solid + + return !m_error; +} + +const char* MSLGenerator::GetResult() const +{ + return m_writer.GetResult(); +} + +void MSLGenerator::OutputStaticDeclarations(int indent, HLSLStatement* statement) +{ + while (statement != NULL) { + if (statement->hidden) { + statement = statement->nextStatement; + continue; + } + + // write struct/buffer outside of the namespace struct + if (statement->nodeType == HLSLNodeType_Struct) { + if (!statement->written) { + HLSLStruct* structure = static_cast(statement); + OutputStruct(indent, structure); + + // skipped for multi-entrypoint + statement->written = true; + } + } + /* These are variables, so must be in namespace + else if (statement->nodeType == HLSLNodeType_Buffer) + { + if (!statement->written) + { + HLSLBuffer* buffer = static_cast(statement); + OutputBuffer(indent, buffer); + + // skipped for multi-entrypoint + statement->written = true; + } + } + */ + + else if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration* declaration = static_cast(statement); + + const HLSLType& type = declaration->type; + + if (type.TestFlags(HLSLTypeFlag_Const | HLSLTypeFlag_Static)) { + if (!declaration->written) { + m_writer.BeginLine(indent, declaration->fileName, declaration->line); + OutputDeclaration(declaration); + m_writer.EndLine(";"); + + // skipped for multi-entrypoint + declaration->written = true; + } + + // TODO: sure this is needed, or does written handle it + // hide declaration from subsequent passes + declaration->hidden = true; + } + } + else if (statement->nodeType == HLSLNodeType_Function) { + HLSLFunction* function = static_cast(statement); + + if (!function->forward) { + OutputStaticDeclarations(indent, function->statement); + + // skipped for multi-entrypoint + //function->written = true; + } + } + + statement = statement->nextStatement; + } +} + +bool MSLGenerator::CanSkipWrittenStatement(const HLSLStatement* statement) const +{ + if (!statement->written) return false; + + // only write these once for multi-entrypoint + if (statement->nodeType == HLSLNodeType_Comment || + // statement->nodeType == HLSLNodeType_Buffer || + statement->nodeType == HLSLNodeType_Struct) + return true; + + // only write const scalars out once, so they don't conflict + if (statement->nodeType == HLSLNodeType_Declaration) { + const HLSLDeclaration* decl = (const HLSLDeclaration*)statement; + if (IsScalarType(decl->type.baseType) && decl->type.flags & HLSLTypeFlag_Const) { + return true; + } + } + + // TODO: all functions are currently thrown into the namespace class + // so can't yet strip them. + + // Helper functions should be skipped once written out + // if (statement->nodeType == HLSLNodeType_Function) + // { + // return true; + // } + + return false; +} + +// recursive +void MSLGenerator::OutputStatements(int indent, HLSLStatement* statement) +{ + // Main generator loop: called recursively + while (statement != NULL) { + // skip pruned statements + if (statement->hidden) { + statement = statement->nextStatement; + continue; + } + + // skip writing across multiple entry points + // skip writing some types across multiple entry points + if (CanSkipWrittenStatement(statement)) { + statement = statement->nextStatement; + continue; + } + statement->written = true; + + OutputAttributes(indent, statement->attributes); + + if (statement->nodeType == HLSLNodeType_Comment) { + HLSLComment* comment = static_cast(statement); + m_writer.WriteLine(indent, "//%s", comment->text); + } + else if (statement->nodeType == HLSLNodeType_Declaration) { + HLSLDeclaration* declaration = static_cast(statement); + + if (declaration->assignment && declaration->assignment->nodeType == HLSLNodeType_FunctionCall) { + OutputFunctionCallStatement(indent, (HLSLFunctionCall*)declaration->assignment, declaration); + } + else { + m_writer.BeginLine(indent, declaration->fileName, declaration->line); + OutputDeclaration(declaration); + m_writer.EndLine(";"); + } + } + else if (statement->nodeType == HLSLNodeType_Struct) { + HLSLStruct* structure = static_cast(statement); + OutputStruct(indent, structure); + } + else if (statement->nodeType == HLSLNodeType_Buffer) { + HLSLBuffer* buffer = static_cast(statement); + OutputBuffer(indent, buffer); + } + else if (statement->nodeType == HLSLNodeType_Function) { + HLSLFunction* function = static_cast(statement); + + if (!function->forward) { + OutputFunction(indent, function); + } + } + else if (statement->nodeType == HLSLNodeType_ExpressionStatement) { + HLSLExpressionStatement* expressionStatement = static_cast(statement); + HLSLExpression* expression = expressionStatement->expression; + + if (expression->nodeType == HLSLNodeType_FunctionCall) { + OutputFunctionCallStatement(indent, (HLSLFunctionCall*)expression, NULL); + } + else { + m_writer.BeginLine(indent, statement->fileName, statement->line); + OutputExpression(expressionStatement->expression, NULL); + m_writer.EndLine(";"); + } + } + else if (statement->nodeType == HLSLNodeType_ReturnStatement) { + HLSLReturnStatement* returnStatement = static_cast(statement); + if (m_currentFunction->numOutputArguments > 0) { + m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line); + m_writer.Write("return { "); + + int numArguments = 0; + if (returnStatement->expression != NULL) { + OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL); + numArguments++; + } + + HLSLArgument* argument = m_currentFunction->argument; + while (argument != NULL) { + if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) { + if (numArguments) m_writer.Write(", "); + m_writer.Write("%s", argument->name); + numArguments++; + } + argument = argument->nextArgument; + } + + m_writer.EndLine(" };"); + } + else if (returnStatement->expression != NULL) { + m_writer.BeginLine(indent, returnStatement->fileName, returnStatement->line); + m_writer.Write("return "); + OutputTypedExpression(m_currentFunction->returnType, returnStatement->expression, NULL); + m_writer.EndLine(";"); + } + else { + m_writer.WriteLineTagged(indent, returnStatement->fileName, returnStatement->line, "return;"); + } + } + else if (statement->nodeType == HLSLNodeType_DiscardStatement) { + HLSLDiscardStatement* discardStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, discardStatement->fileName, discardStatement->line, "discard_fragment();"); + } + else if (statement->nodeType == HLSLNodeType_BreakStatement) { + HLSLBreakStatement* breakStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, breakStatement->fileName, breakStatement->line, "break;"); + } + else if (statement->nodeType == HLSLNodeType_ContinueStatement) { + HLSLContinueStatement* continueStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, continueStatement->fileName, continueStatement->line, "continue;"); + } + else if (statement->nodeType == HLSLNodeType_IfStatement) { + HLSLIfStatement* ifStatement = static_cast(statement); + + if (ifStatement->isStatic) { + int value; + if (!m_tree->GetExpressionValue(ifStatement->condition, value)) { + Error("@if condition could not be evaluated.\n"); + } + if (value != 0) { + OutputStatements(indent + 1, ifStatement->statement); + } + else if (ifStatement->elseStatement != NULL) { + OutputStatements(indent + 1, ifStatement->elseStatement); + } + } + else { + m_writer.BeginLine(indent, ifStatement->fileName, ifStatement->line); + m_writer.Write("if ("); + OutputExpression(ifStatement->condition, NULL); + m_writer.Write(") {"); + m_writer.EndLine(); + OutputStatements(indent + 1, ifStatement->statement); + m_writer.WriteLine(indent, "}"); + if (ifStatement->elseStatement != NULL) { + m_writer.WriteLine(indent, "else {"); + OutputStatements(indent + 1, ifStatement->elseStatement); + m_writer.WriteLine(indent, "}"); + } + } + } + else if (statement->nodeType == HLSLNodeType_ForStatement) { + HLSLForStatement* forStatement = static_cast(statement); + m_writer.BeginLine(indent, forStatement->fileName, forStatement->line); + m_writer.Write("for ("); + OutputDeclaration(forStatement->initialization); + m_writer.Write("; "); + OutputExpression(forStatement->condition, NULL); + m_writer.Write("; "); + OutputExpression(forStatement->increment, NULL); + m_writer.Write(") {"); + m_writer.EndLine(); + OutputStatements(indent + 1, forStatement->statement); + m_writer.WriteLine(indent, "}"); + } + else if (statement->nodeType == HLSLNodeType_BlockStatement) { + HLSLBlockStatement* blockStatement = static_cast(statement); + m_writer.WriteLineTagged(indent, blockStatement->fileName, blockStatement->line, "{"); + OutputStatements(indent + 1, blockStatement->statement); + m_writer.WriteLine(indent, "}"); + } + + // fx file support for Technique/Pipeline + // else if (statement->nodeType == HLSLNodeType_Technique) + // { + // // Techniques are ignored. + // } + // else if (statement->nodeType == HLSLNodeType_Pipeline) + // { + // // Pipelines are ignored. + // } + else { + // Unhandled statement type. + Error("Unknown statement"); + } + + statement = statement->nextStatement; + } +} + +// Called by OutputStatements +void MSLGenerator::OutputAttributes(int indent, HLSLAttribute* attribute) +{ + // IC: These do not appear to exist in MSL. + while (attribute != NULL) { + if (attribute->attributeType == HLSLAttributeType_Unroll) { + // @@ Do any of these work? + //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "#pragma unroll"); + //m_writer.WriteLine(indent, attribute->fileName, attribute->line, "[[unroll]]"); + } + else if (attribute->attributeType == HLSLAttributeType_Flatten) { + // @@ + } + else if (attribute->attributeType == HLSLAttributeType_Branch) { + // @@, [[likely]]? + } + + attribute = attribute->nextAttribute; + } +} + +void MSLGenerator::OutputDeclaration(HLSLDeclaration* declaration) +{ + if (IsSamplerType(declaration->type)) { + m_writer.Write("%s sampler& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), declaration->name); + } + else if (IsTextureType(declaration->type)) { + const char* textureName = GetTypeName(declaration->type, true); + if (textureName) + m_writer.Write("%s %s& %s", GetAddressSpaceName(declaration->type.baseType, declaration->type.addressSpace), textureName, declaration->name); + else + Error("Unknown texture"); + } + else { + OutputDeclaration(declaration->type, declaration->name, declaration->assignment); + + declaration = declaration->nextDeclaration; + while (declaration != NULL) { + m_writer.Write(","); + OutputDeclarationBody(declaration->type, declaration->name, declaration->assignment); + declaration = declaration->nextDeclaration; + } + } +} + +void MSLGenerator::OutputStruct(int indent, HLSLStruct* structure) +{ + m_writer.WriteLineTagged(indent, structure->fileName, structure->line, "struct %s {", structure->name); + HLSLStructField* field = structure->field; + while (field != NULL) { + if (!field->hidden) { + m_writer.BeginLine(indent + 1, field->fileName, field->line); + OutputDeclaration(field->type, field->name, NULL); + + // DONE: would need a semantic remap for all possible semantics + // just use the name the caller specified if sv_semantic + // is not set. The header can handle translating + if (field->sv_semantic) { + m_writer.Write(" [[%s]]", field->sv_semantic); + } + + m_writer.EndLine(";"); + } + field = field->nextField; + } + m_writer.WriteLine(indent, "};"); +} + +void MSLGenerator::OutputBuffer(int indent, HLSLBuffer* buffer) +{ + if (!buffer->IsGlobalFields()) { + m_writer.BeginLine(indent, buffer->fileName, buffer->line); + + // TODO: handle array count for indexing into constant buffer + // some are unbounded array like BAB and SBO + // TODO: may need to use t/u registers for those too and a thread? + + // TODO: fix this, ConstantBuffer can index into a constant buffer too + // detect use of array notation on decl + bool isRef = buffer->bufferType == HLSLBufferType_ConstantBuffer || + buffer->IsGlobalFields(); + + if (buffer->bufferType == HLSLBufferType_ConstantBuffer || + buffer->bufferType == HLSLBufferType_ByteAddressBuffer || + buffer->bufferType == HLSLBufferType_StructuredBuffer) { + m_writer.Write("constant %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name); + } + else { + m_writer.Write("device %s %s %s", buffer->bufferStruct->name, isRef ? "&" : "*", buffer->name); + } + + m_writer.EndLine(";"); + } + else { + // converted cbuffer that spill tons of globals for every field + HLSLDeclaration* field = buffer->field; + + m_writer.BeginLine(indent, buffer->fileName, buffer->line); + m_writer.Write("struct %s_ubo", buffer->name); + m_writer.EndLine(" {"); + + while (field != NULL) { + if (!field->hidden) { + m_writer.BeginLine(indent + 1, field->fileName, field->line); + OutputDeclaration(field->type, field->name, field->assignment, false, false, 0); // /*alignment=*/16); + m_writer.EndLine(";"); + } + field = (HLSLDeclaration*)field->nextStatement; + } + m_writer.WriteLine(indent, "};"); + + m_writer.WriteLine(indent, "constant %s_ubo & %s;", buffer->name, buffer->name); + } +} + +void MSLGenerator::OutputFunction(int indent, HLSLFunction* function) +{ + const char* functionName = function->name; + const char* returnTypeName = GetTypeName(function->returnType, /*exactType=*/false); + + // Declare output tuple. + if (function->numOutputArguments > 0) { + returnTypeName = m_tree->AddStringFormat("%s_out%d", functionName, function->line); // @@ Find a better way to generate unique name. + + m_writer.BeginLine(indent, function->fileName, function->line); + m_writer.Write("struct %s { ", returnTypeName); + m_writer.EndLine(); + + if (function->returnType.baseType != HLSLBaseType_Void) { + m_writer.BeginLine(indent + 1, function->fileName, function->line); + OutputDeclaration(function->returnType, "__result", /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false); + m_writer.EndLine(";"); + } + + HLSLArgument* argument = function->argument; + while (argument != NULL) { + if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) { + m_writer.BeginLine(indent + 1, function->fileName, function->line); + OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false); + m_writer.EndLine(";"); + } + argument = argument->nextArgument; + } + + m_writer.WriteLine(indent, "};"); + + // Create unique function name to avoid collision with overloads and different return types. + m_writer.BeginLine(indent, function->fileName, function->line); + m_writer.Write("%s %s_%d(", returnTypeName, functionName, function->line); + } + else { + m_writer.BeginLine(indent, function->fileName, function->line); + m_writer.Write("%s %s(", returnTypeName, functionName); + } + + OutputArguments(function->argument); + + m_writer.EndLine(") {"); + m_currentFunction = function; + + // Local declarations for output arguments. + HLSLArgument* argument = function->argument; + while (argument != NULL) { + if (argument->modifier == HLSLArgumentModifier_Out) { + m_writer.BeginLine(indent + 1, function->fileName, function->line); + OutputDeclaration(argument->type, argument->name, /*defaultValue=*/NULL, /*isRef=*/false, /*isConst=*/false); + m_writer.EndLine(";"); + } + argument = argument->nextArgument; + } + + OutputStatements(indent + 1, function->statement); // @@ Modify return statements if function has multiple output arguments! + + // Output implicit return. + if (function->numOutputArguments > 0) { + bool needsImplicitReturn = true; + HLSLStatement* statement = function->statement; + if (statement != NULL) { + while (statement->nextStatement != NULL) { + statement = statement->nextStatement; + } + needsImplicitReturn = (statement->nodeType != HLSLNodeType_ReturnStatement) && function->returnType.baseType == HLSLBaseType_Void; + } + + if (needsImplicitReturn) { + m_writer.BeginLine(indent + 1); + m_writer.Write("return { "); + + int numArguments = 0; + HLSLArgument* argument2 = m_currentFunction->argument; + while (argument2 != NULL) { + if (argument2->modifier == HLSLArgumentModifier_Out || + argument2->modifier == HLSLArgumentModifier_Inout) { + if (numArguments) m_writer.Write(", "); + m_writer.Write("%s ", argument2->name); + numArguments++; + } + argument2 = argument2->nextArgument; + } + + m_writer.EndLine(" };"); + } + } + + m_writer.WriteLine(indent, "};"); + m_currentFunction = NULL; +} + +// @@ We could be a lot smarter removing parenthesis based on the operator precedence of the parent expression. +static bool NeedsParenthesis(HLSLExpression* expression, HLSLExpression* parentExpression) +{ + // For now we just omit the parenthesis if there's no parent expression. + if (parentExpression == NULL) { + return false; + } + + // One more special case that's pretty common. + if (parentExpression->nodeType == HLSLNodeType_MemberAccess) { + if (expression->nodeType == HLSLNodeType_IdentifierExpression || + expression->nodeType == HLSLNodeType_ArrayAccess || + expression->nodeType == HLSLNodeType_MemberAccess) { + return false; + } + } + + return true; +} + +bool MSLGenerator::NeedsCast(const HLSLType& target, const HLSLType& source) +{ + HLSLBaseType targetType = target.baseType; + HLSLBaseType sourceType = source.baseType; + + if (sourceType == HLSLBaseType_Int) { + // int k = 1; + } + + /*if (IsScalarType(target)) + { + // Scalar types do not need casting. + return false; + }*/ + + if (m_options.treatHalfAsFloat) { + // use call to convert half back to float type + if (IsHalf(targetType)) targetType = HalfToFloatBaseType(targetType); + if (IsHalf(sourceType)) sourceType = HalfToFloatBaseType(sourceType); + } + + return targetType != sourceType && (IsCoreTypeEqual(targetType, sourceType) || IsScalarType(sourceType)); +} + +void MSLGenerator::OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression) +{ + // If base types are not exactly the same, do explicit cast. + bool closeCastExpression = false; + if (NeedsCast(type, expression->expressionType)) { + OutputCast(type); + m_writer.Write("("); + closeCastExpression = true; + } + + OutputExpression(expression, parentExpression); + + if (closeCastExpression) { + m_writer.Write(")"); + } +} + +void MSLGenerator::OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression) +{ + if (expression->nodeType == HLSLNodeType_IdentifierExpression) { + HLSLIdentifierExpression* identifierExpression = static_cast(expression); + const char* name = identifierExpression->name; + + { + if (identifierExpression->global) { + // prepend cbuffer name + HLSLBuffer* buffer; + HLSLDeclaration* declaration = m_tree->FindGlobalDeclaration(identifierExpression->name, &buffer); + + if (declaration && declaration->buffer) { + ASSERT(buffer == declaration->buffer); + m_writer.Write("%s.", declaration->buffer->name); + } + } + m_writer.Write("%s", name); + + // IC: Add swizzle if this is a member access of a field that has the swizzle flag. + /*if (parentExpression->nodeType == HLSLNodeType_MemberAccess) + { + HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)parentExpression; + const HLSLType & objectType = memberAccess->object->expressionType; + const HLSLStruct* structure = m_tree->FindGlobalStruct(objectType.typeName); + if (structure != NULL) + { + const HLSLStructField* field = structure->field; + while (field != NULL) + { + if (field->name == name) + { + if (field->type.flags & HLSLTypeFlag_Swizzle_BGRA) + { + m_writer.Write(".bgra", name); + } + } + } + } + }*/ + } + } + else if (expression->nodeType == HLSLNodeType_CastingExpression) { + HLSLCastingExpression* castingExpression = static_cast(expression); + OutputCast(castingExpression->type); + m_writer.Write("("); + OutputExpression(castingExpression->expression, castingExpression); + m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_ConstructorExpression) { + HLSLConstructorExpression* constructorExpression = static_cast(expression); + + m_writer.Write("%s(", GetTypeName(constructorExpression->type, /*exactType=*/false)); + //OutputExpressionList(constructorExpression->type, constructorExpression->argument); // @@ Get element type. + OutputExpressionList(constructorExpression->argument); + m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_LiteralExpression) { + HLSLLiteralExpression* literalExpression = static_cast(expression); + + HLSLBaseType type = literalExpression->type; + if (m_options.treatHalfAsFloat && IsHalf(type)) + type = HLSLBaseType_Float; + + switch (type) { + case HLSLBaseType_Half: + case HLSLBaseType_Double: + case HLSLBaseType_Float: { + char floatBuffer[64]; + + String_FormatFloat(floatBuffer, sizeof(floatBuffer), literalExpression->fValue); + String_StripTrailingFloatZeroes(floatBuffer); + m_writer.Write("%s%s", floatBuffer, type == HLSLBaseType_Half ? "h" : ""); + break; + } + // TODO: missing uint types (trailing character u, ul, ..) + + case HLSLBaseType_Short: + case HLSLBaseType_Long: + case HLSLBaseType_Int: + m_writer.Write("%d", literalExpression->iValue); + break; + + case HLSLBaseType_Bool: + m_writer.Write("%s", literalExpression->bValue ? "true" : "false"); + break; + default: + Error("Unhandled literal"); + //ASSERT(0); + } + } + else if (expression->nodeType == HLSLNodeType_UnaryExpression) { + HLSLUnaryExpression* unaryExpression = static_cast(expression); + const char* op = "?"; + bool pre = true; + switch (unaryExpression->unaryOp) { + case HLSLUnaryOp_Negative: + op = "-"; + break; + case HLSLUnaryOp_Positive: + op = "+"; + break; + case HLSLUnaryOp_Not: + op = "!"; + break; + case HLSLUnaryOp_BitNot: + op = "~"; + break; + case HLSLUnaryOp_PreIncrement: + op = "++"; + break; + case HLSLUnaryOp_PreDecrement: + op = "--"; + break; + case HLSLUnaryOp_PostIncrement: + op = "++"; + pre = false; + break; + case HLSLUnaryOp_PostDecrement: + op = "--"; + pre = false; + break; + } + bool addParenthesis = NeedsParenthesis(unaryExpression->expression, expression); + if (addParenthesis) m_writer.Write("("); + if (pre) { + m_writer.Write("%s", op); + OutputExpression(unaryExpression->expression, unaryExpression); + } + else { + OutputExpression(unaryExpression->expression, unaryExpression); + m_writer.Write("%s", op); + } + if (addParenthesis) m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_BinaryExpression) { + HLSLBinaryExpression* binaryExpression = static_cast(expression); + + bool addParenthesis = NeedsParenthesis(expression, parentExpression); + if (addParenthesis) m_writer.Write("("); + + { + if (IsArithmeticOp(binaryExpression->binaryOp) || IsLogicOp(binaryExpression->binaryOp)) { + // Do intermediate type promotion, without changing dimension: + HLSLType promotedType = binaryExpression->expression1->expressionType; + + if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType)) { + promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType); + } + + OutputTypedExpression(promotedType, binaryExpression->expression1, binaryExpression); + } + else { + OutputExpression(binaryExpression->expression1, binaryExpression); + } + + const char* op = "?"; + switch (binaryExpression->binaryOp) { + case HLSLBinaryOp_Add: + op = " + "; + break; + case HLSLBinaryOp_Sub: + op = " - "; + break; + case HLSLBinaryOp_Mul: + op = " * "; + break; + case HLSLBinaryOp_Div: + op = " / "; + break; + case HLSLBinaryOp_Less: + op = " < "; + break; + case HLSLBinaryOp_Greater: + op = " > "; + break; + case HLSLBinaryOp_LessEqual: + op = " <= "; + break; + case HLSLBinaryOp_GreaterEqual: + op = " >= "; + break; + case HLSLBinaryOp_Equal: + op = " == "; + break; + case HLSLBinaryOp_NotEqual: + op = " != "; + break; + case HLSLBinaryOp_Assign: + op = " = "; + break; + case HLSLBinaryOp_AddAssign: + op = " += "; + break; + case HLSLBinaryOp_SubAssign: + op = " -= "; + break; + case HLSLBinaryOp_MulAssign: + op = " *= "; + break; + case HLSLBinaryOp_DivAssign: + op = " /= "; + break; + case HLSLBinaryOp_And: + op = " && "; + break; + case HLSLBinaryOp_Or: + op = " || "; + break; + case HLSLBinaryOp_BitAnd: + op = " & "; + break; + case HLSLBinaryOp_BitOr: + op = " | "; + break; + case HLSLBinaryOp_BitXor: + op = " ^ "; + break; + default: + Error("unhandled literal"); + //ASSERT(0); + } + m_writer.Write("%s", op); + + if (binaryExpression->binaryOp == HLSLBinaryOp_MulAssign || + binaryExpression->binaryOp == HLSLBinaryOp_DivAssign || + IsArithmeticOp(binaryExpression->binaryOp) || + IsLogicOp(binaryExpression->binaryOp)) { + // Do intermediate type promotion, without changing dimension: + HLSLType promotedType = binaryExpression->expression2->expressionType; + + if (!IsNumericTypeEqual(binaryExpression->expressionType.baseType, promotedType.baseType)) { + // This should only promote up (half->float, etc) + promotedType.baseType = PromoteType(binaryExpression->expressionType.baseType, promotedType.baseType); + } + + OutputTypedExpression(promotedType, binaryExpression->expression2, binaryExpression); + } + else if (IsAssignOp(binaryExpression->binaryOp)) { + OutputTypedExpression(binaryExpression->expressionType, binaryExpression->expression2, binaryExpression); + } + else { + OutputExpression(binaryExpression->expression2, binaryExpression); + } + } + if (addParenthesis) m_writer.Write(")"); + } + else if (expression->nodeType == HLSLNodeType_ConditionalExpression) { + HLSLConditionalExpression* conditionalExpression = static_cast(expression); + + // TODO: @@ Remove parenthesis. + m_writer.Write("(("); + OutputExpression(conditionalExpression->condition, NULL); + m_writer.Write(")?("); + OutputExpression(conditionalExpression->trueExpression, NULL); + m_writer.Write("):("); + OutputExpression(conditionalExpression->falseExpression, NULL); + m_writer.Write("))"); + } + else if (expression->nodeType == HLSLNodeType_MemberAccess) { + HLSLMemberAccess* memberAccess = static_cast(expression); + bool addParenthesis = NeedsParenthesis(memberAccess->object, expression); + + if (addParenthesis) { + m_writer.Write("("); + } + OutputExpression(memberAccess->object, NULL); + if (addParenthesis) { + m_writer.Write(")"); + } + + m_writer.Write(".%s", memberAccess->field); + } + else if (expression->nodeType == HLSLNodeType_ArrayAccess) { + HLSLArrayAccess* arrayAccess = static_cast(expression); + + // Just use the matrix notation, using column_order instead of row_order + //if (arrayAccess->array->expressionType.array) // || !IsMatrixType(arrayAccess->array->expressionType.baseType)) + { + OutputExpression(arrayAccess->array, expression); + m_writer.Write("["); + OutputExpression(arrayAccess->index, NULL); + m_writer.Write("]"); + } + // else + // { + // // @@ This doesn't work for l-values! + // m_writer.Write("column("); + // OutputExpression(arrayAccess->array, NULL); + // m_writer.Write(", "); + // OutputExpression(arrayAccess->index, NULL); + // m_writer.Write(")"); + // } + } + else if (expression->nodeType == HLSLNodeType_FunctionCall) { + HLSLFunctionCall* functionCall = static_cast(expression); + OutputFunctionCall(functionCall, parentExpression); + } + else if (expression->nodeType == HLSLNodeType_MemberFunctionCall) { + HLSLMemberFunctionCall* functionCall = static_cast(expression); + + // Write out the member identifier + m_writer.Write("%s.", functionCall->memberIdentifier->name); + + OutputFunctionCall(functionCall, parentExpression); + } + else { + Error("unknown expression"); + } +} + +void MSLGenerator::OutputCast(const HLSLType& type) +{ + // Note: msl fails on float4x4 to float3x3 casting + if (type.baseType == HLSLBaseType_Float3x3) { + m_writer.Write("tofloat3x3"); + } + else if (type.baseType == HLSLBaseType_Half3x3) { + m_writer.Write("tohalft3x3"); + } + else { + m_writer.Write("("); + OutputDeclarationType(type, /*isConst=*/false, /*isRef=*/false, /*alignment=*/0, /*isTypeCast=*/true); + m_writer.Write(")"); + } +} + +// Called by the various Output functions +void MSLGenerator::OutputArguments(HLSLArgument* argument) +{ + int numArgs = 0; + while (argument != NULL) { + // Skip hidden and output arguments. + if (argument->hidden || argument->modifier == HLSLArgumentModifier_Out) { + argument = argument->nextArgument; + continue; + } + + if (numArgs > 0) { + m_writer.Write(", "); + } + + //bool isRef = false; + bool isConst = false; + /*if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) + { + isRef = true; + }*/ + if (argument->modifier == HLSLArgumentModifier_In || argument->modifier == HLSLArgumentModifier_Const) { + isConst = true; + } + + OutputDeclaration(argument->type, argument->name, argument->defaultValue, /*isRef=*/false, isConst); + argument = argument->nextArgument; + ++numArgs; + } +} + +void MSLGenerator::OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef, bool isConst, int alignment) +{ + OutputDeclarationType(type, isRef, isConst, alignment); + OutputDeclarationBody(type, name, assignment, isRef); +} + +void MSLGenerator::OutputDeclarationType(const HLSLType& type, bool isRef, bool isConst, int alignment, bool isTypeCast) +{ + const char* typeName = GetTypeName(type, /*exactType=*/isTypeCast); // @@ Don't allow type changes in uniform/globals or casts! + + /*if (!isTypeCast)*/ + { + if (isRef && !isTypeCast) { + m_writer.Write("%s ", GetAddressSpaceName(type.baseType, type.addressSpace)); + } + if (isConst || type.TestFlags(HLSLTypeFlag_Const)) { + m_writer.Write("constant "); + + // m_writer.Write("const "); + // + // if ((type.flags & HLSLTypeFlag_Static) != 0 && !isTypeCast) + // { + // // TODO: use GetAddressSpaceName? + // m_writer.Write("static constant constexpr "); + // } + } + } + + if (alignment != 0 && !isTypeCast) { + // caller can request alignment, but default is 0 + m_writer.Write("alignas(%d) ", alignment); + } + + m_writer.Write("%s", typeName); + + if (isTypeCast) { + // Do not output modifiers inside type cast expressions. + return; + } + + // Interpolation modifiers. + if (type.TestFlags(HLSLTypeFlag_NoInterpolation)) { + m_writer.Write(" [[flat]]"); + } + else { + if (type.TestFlags(HLSLTypeFlag_NoPerspective)) { + if (type.TestFlags(HLSLTypeFlag_Centroid)) { + m_writer.Write(" [[centroid_no_perspective]]"); + } + else if (type.TestFlags(HLSLTypeFlag_Sample)) { + m_writer.Write(" [[sample_no_perspective]]"); + } + else { + m_writer.Write(" [[center_no_perspective]]"); + } + } + else { + if (type.TestFlags(HLSLTypeFlag_Centroid)) { + m_writer.Write(" [[centroid_perspective]]"); + } + else if (type.TestFlags(HLSLTypeFlag_Sample)) { + m_writer.Write(" [[sample_perspective]]"); + } + else { + // Default. + //m_writer.Write(" [[center_perspective]]"); + } + } + } +} + +void MSLGenerator::OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef) +{ + if (isRef) { + // Arrays of refs are illegal in C++ and hence MSL, need to "link" the & to the var name + m_writer.Write("(&"); + } + + // Then name + m_writer.Write(" %s", name); + + if (isRef) { + m_writer.Write(")"); + } + + // Add brackets for arrays + if (type.array) { + m_writer.Write("["); + if (type.arraySize != NULL) { + OutputExpression(type.arraySize, NULL); + } + m_writer.Write("]"); + } + + // Semantics and registers unhandled for now + + // Assignment handling + if (assignment != NULL) { + m_writer.Write(" = "); + if (type.array) { + m_writer.Write("{ "); + OutputExpressionList(assignment); + m_writer.Write(" }"); + } + else { + OutputTypedExpression(type, assignment, NULL); + } + } +} + +void MSLGenerator::OutputExpressionList(HLSLExpression* expression) +{ + int numExpressions = 0; + while (expression != NULL) { + if (numExpressions > 0) { + m_writer.Write(", "); + } + OutputExpression(expression, NULL); + expression = expression->nextExpression; + ++numExpressions; + } +} + +// Cast all expressions to given type. +void MSLGenerator::OutputExpressionList(const HLSLType& type, HLSLExpression* expression) +{ + int numExpressions = 0; + while (expression != NULL) { + if (numExpressions > 0) { + m_writer.Write(", "); + } + + OutputTypedExpression(type, expression, NULL); + expression = expression->nextExpression; + ++numExpressions; + } +} + +// Cast each expression to corresponding argument type. +void MSLGenerator::OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression) +{ + int numExpressions = 0; + while (expression != NULL) { + ASSERT(argument != NULL); + if (argument->modifier != HLSLArgumentModifier_Out) { + if (numExpressions > 0) { + m_writer.Write(", "); + } + + OutputTypedExpression(argument->type, expression, NULL); + ++numExpressions; + } + + expression = expression->nextExpression; + argument = argument->nextArgument; + } +} + +inline bool isAddressable(HLSLExpression* expression) +{ + if (expression->nodeType == HLSLNodeType_IdentifierExpression) { + return true; + } + if (expression->nodeType == HLSLNodeType_ArrayAccess) { + return true; + } + if (expression->nodeType == HLSLNodeType_MemberAccess) { + HLSLMemberAccess* memberAccess = (HLSLMemberAccess*)expression; + return !memberAccess->swizzle; + } + return false; +} + +void MSLGenerator::OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* declaration) +{ + // Nothing special about these cases: + if (functionCall->function->numOutputArguments == 0) { + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + if (declaration) { + OutputDeclaration(declaration); + } + else { + OutputExpression(functionCall, NULL); + } + m_writer.EndLine(";"); + return; + } + + // Transform this: + // float foo = functionCall(bah, poo); + + // Into: + // auto tmp = functionCall(bah, poo); + // bah = tmp.bah; + // poo = tmp.poo; + // float foo = tmp.__result; + + const char* functionName = functionCall->function->name; + + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + m_writer.Write("auto out%d = %s_%d(", functionCall->line, functionName, functionCall->function->line); + OutputExpressionList(functionCall->function->argument, functionCall->argument); + m_writer.EndLine(");"); + + HLSLExpression* expression = functionCall->argument; + HLSLArgument* argument = functionCall->function->argument; + while (argument != NULL) { + if (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout) { + m_writer.BeginLine(indent); + OutputExpression(expression, NULL); + // @@ This assignment may need a cast. + m_writer.Write(" = "); + if (NeedsCast(expression->expressionType, argument->type)) { + m_writer.Write("(%s)", GetTypeName(expression->expressionType, true)); + } + m_writer.Write("out%d.%s;", functionCall->line, argument->name); + m_writer.EndLine(); + } + + expression = expression->nextExpression; + argument = argument->nextArgument; + } + + if (declaration) { + m_writer.BeginLine(indent); + OutputDeclarationType(declaration->type); + m_writer.Write(" %s = out%d.__result;", declaration->name, functionCall->line); + m_writer.EndLine(); + } + + /* TODO: Alec, why is all this chopped out? + + int argumentIndex = 0; + HLSLArgument* argument = functionCall->function->argument; + HLSLExpression* expression = functionCall->argument; + while (argument != NULL) + { + if (!isAddressable(expression)) + { + if (argument->modifier == HLSLArgumentModifier_Out) + { + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + OutputDeclarationType(argument->type); + m_writer.Write("tmp%d;", argumentIndex); + m_writer.EndLine(); + } + else if (argument->modifier == HLSLArgumentModifier_Inout) + { + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + OutputDeclarationType(argument->type); + m_writer.Write("tmp%d = ", argumentIndex); + OutputExpression(expression, NULL); + m_writer.EndLine(";"); + } + } + argument = argument->nextArgument; + expression = expression->nextExpression; + argumentIndex++; + } + + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + const char* name = functionCall->function->name; + m_writer.Write("%s(", name); + //OutputExpressionList(functionCall->argument); + + // Output expression list with temporary substitution. + argumentIndex = 0; + argument = functionCall->function->argument; + expression = functionCall->argument; + while (expression != NULL) + { + if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)) + { + m_writer.Write("tmp%d", argumentIndex); + } + else + { + OutputExpression(expression, NULL); + } + + argument = argument->nextArgument; + expression = expression->nextExpression; + argumentIndex++; + if (expression) + { + m_writer.Write(", "); + } + } + m_writer.EndLine(");"); + + argumentIndex = 0; + argument = functionCall->function->argument; + expression = functionCall->argument; + while (expression != NULL) + { + if (!isAddressable(expression) && (argument->modifier == HLSLArgumentModifier_Out || argument->modifier == HLSLArgumentModifier_Inout)) + { + m_writer.BeginLine(indent, functionCall->fileName, functionCall->line); + OutputExpression(expression, NULL); + m_writer.Write(" = tmp%d", argumentIndex); + m_writer.EndLine(";"); + } + + argument = argument->nextArgument; + expression = expression->nextExpression; + argumentIndex++; + } + */ +} + +void MSLGenerator::OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression* parentExpression) +{ + if (functionCall->function->numOutputArguments > 0) { + ASSERT(false); + } + + const char* functionName = functionCall->function->name; + + // If function begins with tex, then it returns float4 or half4 depending on options.halfTextureSamplers + /*if (strncmp(functionName, "tex", 3) == 0) + { + if (parentExpression && IsFloat(parentExpression->expressionType.baseType)) + { + if (m_options.halfTextureSamplers) + { + OutputCast(functionCall->expressionType); + } + } + }*/ + + { + m_writer.Write("%s(", functionName); + OutputExpressionList(functionCall->function->argument, functionCall->argument); + //OutputExpressionList(functionCall->argument); + m_writer.Write(")"); + } +} + +const char* MSLGenerator::TranslateInputSemantic(const char* semantic) +{ + if (semantic == NULL) + return NULL; + + uint32_t length, index; + ParseSemantic(semantic, &length, &index); + + if (m_target == HLSLTarget_VertexShader) { + // These are DX10 convention + if (String_Equal(semantic, "SV_InstanceID")) + return "instance_id"; + if (String_Equal(semantic, "SV_VertexID")) + return "vertex_id"; + + // requires SPV_KHR_shader_draw_parameters for Vulkan + // not a DX12 construct. + if (String_Equal(semantic, "BASEVERTEX")) + return "base_vertex"; + if (String_Equal(semantic, "BASEINSTANCE")) + return "base_instance"; + //if (String_Equal(semantic, "DRAW_INDEX")) + // return "draw_index"; + + // TODO: primitive_id, barycentric + + // Handle attributes + + // Can set custom attributes via a callback + if (m_options.attributeCallback) { + char name[64]; + ASSERT(length < sizeof(name)); + + strncpy(name, semantic, length); + name[length] = 0; + + int attribute = m_options.attributeCallback(name, index); + + if (attribute >= 0) { + return m_tree->AddStringFormat("attribute(%d)", attribute); + } + } + + if (String_Equal(semantic, "SV_Position")) + return "attribute(POSITION)"; + + return m_tree->AddStringFormat("attribute(%s)", semantic); + } + else if (m_target == HLSLTarget_PixelShader) { + // PS inputs + + if (String_Equal(semantic, "SV_Position")) + return "position"; + + // if (String_Equal(semantic, "POSITION")) + // return "position"; + if (String_Equal(semantic, "SV_IsFrontFace")) + return "front_facing"; + + // VS sets what layer to render into, ps can look at it. + // Gpu Family 5. + if (String_Equal(semantic, "SV_RenderTargetArrayIndex")) + return "render_target_array_index"; + + // dual source? passes in underlying color + if (String_Equal(semantic, "DST_COLOR")) + return "color(0)"; + + if (String_Equal(semantic, "SV_SampleIndex")) + return "sample_id"; + //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask"; + //if (String_Equal(semantic, "SV_Coverage")) return "sample_mask,post_depth_coverage"; + } + else if (m_target == HLSLTarget_ComputeShader) { + // compute inputs + if (String_Equal(semantic, "SV_DispatchThreadID")) + return "thread_position_in_grid"; + } + return NULL; +} + +const char* MSLGenerator::TranslateOutputSemantic(const char* semantic) +{ + if (semantic == NULL) + return NULL; + + uint32_t length, index; + ParseSemantic(semantic, &length, &index); + + if (m_target == HLSLTarget_VertexShader) { + if (String_Equal(semantic, "SV_Position")) + return "position"; + + // PSIZE is non-square in DX9, and square in DX10 (and MSL) + // https://github.com/KhronosGroup/glslang/issues/1154 + if (String_Equal(semantic, "PSIZE")) + return "point_size"; + + // control layer in Gpu Family 5 + if (String_Equal(semantic, "SV_RenderTargetArrayIndex")) + return "render_target_array_index"; + + // TODO: add + // SV_ViewportArrayIndex + // SV_ClipDistance0..n, SV_CullDistance0..n + } + else if (m_target == HLSLTarget_PixelShader) { + // Not supporting flags, add as bool to options if needed + // if (m_options.flags & MSLGenerator::Flag_NoIndexAttribute) + // { + // // No dual-source blending on iOS, and no index() attribute + // if (String_Equal(semantic, "COLOR0_1")) return NULL; + // } + // else + { + // See these settings + // MTLBlendFactorSource1Color, OneMinusSource1Color, Source1Alpha, OneMinuSource1Alpha. + + // @@ IC: Hardcoded for this specific case, extend ParseSemantic? + if (String_Equal(semantic, "COLOR0_1")) + return "color(0), index(1)"; + } + + // This is only in A14 and higher + if (String_Equal(semantic, "SV_Berycentrics")) + return "barycentric_coord"; + + // Is there an HLSL euivalent. Have vulkan ext for PointSize + // "point_coord" + + // "primitive_id" + + if (strncmp(semantic, "SV_Target", length) == 0) { + return m_tree->AddStringFormat("color(%d)", index); + } + // if (strncmp(semantic, "COLOR", length) == 0) + // { + // return m_tree->AddStringFormat("color(%d)", index); + // } + + // depth variants to preserve earlyz, use greater on reverseZ + if (String_Equal(semantic, "SV_Depth")) + return "depth(any)"; + + // These don't quite line up, since comparison is not == + // Metal can only use any/less/greater. Preserve early z when outputting depth. + // reverseZ would use greater. + if (String_Equal(semantic, "SV_DepthGreaterEqual")) + return "depth(greater)"; + if (String_Equal(semantic, "SV_DepthLessEqual")) + return "depth(less)"; + + if (String_Equal(semantic, "SV_Coverage")) + return "sample_mask"; + } + else if (m_target == HLSLTarget_ComputeShader) { + // compute outputs + } + return NULL; +} + +const char* MSLGenerator::GetTypeName(const HLSLType& type, bool exactType) +{ + bool promote = ((type.flags & HLSLTypeFlag_NoPromote) == 0); + + // number + bool isHalfNumerics = promote && !m_options.treatHalfAsFloat; + HLSLBaseType baseType = type.baseType; + + // Note: these conversions should really be done during parsing + // so that casting gets applied. + if (!isHalfNumerics) + baseType = HalfToFloatBaseType(baseType); + + // MSL doesn't support double + if (IsDouble(baseType)) + baseType = DoubleToFloatBaseType(baseType); + + HLSLType remappedType(baseType); + remappedType.typeName = type.typeName; // in case it's a struct + + if (IsSamplerType(baseType) || IsNumericType(baseType) || baseType == HLSLBaseType_Void || baseType == HLSLBaseType_UserDefined) + return GetTypeNameMetal(remappedType); + + // texture + if (IsTextureType(baseType)) { + // unclear if depth supports half, may have to be float always + + bool isHalfTexture = promote && IsHalf(type.formatType) && !m_options.treatHalfAsFloat; + + // MSL docs state must be float type, but what about D16f texture? + if (IsDepthTextureType(baseType)) + isHalfTexture = false; + + // TODO: could use GetTypeNameMetal() but it doesn't include <> portion + // so would have to pool and then return the result. + + // This would allow more formats + // const char* textureTypeName = GetTypeNameMetal(baseType); + // const char* formatTypeName = GetFormatTypeName(baseType, formatType); + // snprintf(buf, sizeof(buf), "%s<%s>", textureTypeName, formatTypeName); + + switch (baseType) { + case HLSLBaseType_Depth2D: + return isHalfTexture ? "depth2d" : "depth2d"; + case HLSLBaseType_Depth2DArray: + return isHalfTexture ? "depth2d_array" : "depth2d_array"; + case HLSLBaseType_DepthCube: + return isHalfTexture ? "depthcube" : "depthcube"; + + /* TODO: also depth_ms_array, but HLSL6.6 equivalent + case HLSLBaseType_Depth2DMS: + return isHalfTexture ? "depth2d_ms" : "depth2d_ms"; + */ + + // More types than just half/float for this + case HLSLBaseType_RWTexture2D: + return isHalfTexture ? "texture2d" : "texture2d"; + + case HLSLBaseType_Texture2D: + return isHalfTexture ? "texture2d" : "texture2d"; + case HLSLBaseType_Texture2DArray: + return isHalfTexture ? "texture2d_array" : "texture2d_array"; + case HLSLBaseType_Texture3D: + return isHalfTexture ? "texture3d" : "texture3d"; + case HLSLBaseType_TextureCube: + return isHalfTexture ? "texturecube" : "texturecube"; + case HLSLBaseType_TextureCubeArray: + return isHalfTexture ? "texturecube_array" : "texturecube_array"; + case HLSLBaseType_Texture2DMS: + return isHalfTexture ? "texture2d_ms" : "texture2d_ms"; + + default: + break; + } + } + + Error("Unknown Type"); + return NULL; +} + +} //namespace M4 diff --git a/hlslparser/src/MSLGenerator.h b/hlslparser/src/MSLGenerator.h new file mode 100644 index 00000000..1b69b028 --- /dev/null +++ b/hlslparser/src/MSLGenerator.h @@ -0,0 +1,105 @@ +#pragma once + +#include "CodeWriter.h" +#include "HLSLTree.h" + +namespace M4 { + +class HLSLTree; +struct HLSLFunction; +struct HLSLStruct; + +struct MSLOptions { + int (*attributeCallback)(const char* name, uint32_t index) = NULL; + + // no CLI to set offset + uint32_t bufferRegisterOffset = 0; + + bool writeFileLine = false; + bool treatHalfAsFloat = false; +}; + +/** + * This class is used to generate MSL shaders. + */ +class MSLGenerator { +public: + MSLGenerator(); + + bool Generate(HLSLTree* tree, HLSLTarget target, const char* entryName, const MSLOptions& options = MSLOptions()); + const char* GetResult() const; + +private: + // @@ Rename class argument. Add buffers & textures. + struct ClassArgument { + const char* name; + HLSLType type; + //const char* typeName; // @@ Do we need more than the type name? + const char* registerName; + bool isRef; + + ClassArgument* nextArg; + + ClassArgument(const char* name, HLSLType type, const char* registerName, bool isRef) : name(name), type(type), registerName(registerName), isRef(isRef) + { + nextArg = NULL; + } + }; + + void AddClassArgument(ClassArgument* arg); + + void Prepass(HLSLTree* tree, HLSLTarget target, HLSLFunction* entryFunction); + void CleanPrepass(); + + void PrependDeclarations(); + + void OutputStaticDeclarations(int indent, HLSLStatement* statement); + void OutputStatements(int indent, HLSLStatement* statement); + void OutputAttributes(int indent, HLSLAttribute* attribute); + void OutputDeclaration(HLSLDeclaration* declaration); + void OutputStruct(int indent, HLSLStruct* structure); + void OutputBuffer(int indent, HLSLBuffer* buffer); + void OutputFunction(int indent, HLSLFunction* function); + void OutputExpression(HLSLExpression* expression, HLSLExpression* parentExpression); + void OutputTypedExpression(const HLSLType& type, HLSLExpression* expression, HLSLExpression* parentExpression); + bool NeedsCast(const HLSLType& target, const HLSLType& source); + void OutputCast(const HLSLType& type); + + void OutputArguments(HLSLArgument* argument); + void OutputDeclaration(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef = false, bool isConst = false, int alignment = 0); + void OutputDeclarationType(const HLSLType& type, bool isConst = false, bool isRef = false, int alignment = 0, bool isTypeCast = false); + void OutputDeclarationBody(const HLSLType& type, const char* name, HLSLExpression* assignment, bool isRef = false); + void OutputExpressionList(HLSLExpression* expression); + void OutputExpressionList(const HLSLType& type, HLSLExpression* expression); + void OutputExpressionList(HLSLArgument* argument, HLSLExpression* expression); + + void OutputFunctionCallStatement(int indent, HLSLFunctionCall* functionCall, HLSLDeclaration* assingmentExpression); + void OutputFunctionCall(HLSLFunctionCall* functionCall, HLSLExpression* parentExpression); + + const char* TranslateInputSemantic(const char* semantic); + const char* TranslateOutputSemantic(const char* semantic); + + const char* GetTypeName(const HLSLType& type, bool exactType); + const char* GetAddressSpaceName(HLSLBaseType baseType, HLSLAddressSpace addressSpace) const; + + bool CanSkipWrittenStatement(const HLSLStatement* statement) const; + + void Error(const char* format, ...) const M4_PRINTF_ATTR(2, 3); + +private: + CodeWriter m_writer; + + HLSLTree* m_tree; + const char* m_entryName; + HLSLTarget m_target; + MSLOptions m_options; + + mutable bool m_error; + + ClassArgument* m_firstClassArgument; + ClassArgument* m_lastClassArgument; + + HLSLFunction* m_currentFunction; +}; + +} //namespace M4 diff --git a/hlslparser/src/Main.cpp b/hlslparser/src/Main.cpp new file mode 100644 index 00000000..b471f26f --- /dev/null +++ b/hlslparser/src/Main.cpp @@ -0,0 +1,339 @@ +#include "HLSLParser.h" + +//#include "GLSLGenerator.h" +#include +#include + +#include + +#include "HLSLGenerator.h" +#include "MSLGenerator.h" + +using namespace std; + +enum Language { + Language_MSL, + Language_HLSL, +}; + +bool ReadFile(const char* fileName, string& str) +{ + struct stat stats = {}; + if (stat(fileName, &stats) < 0) { + return false; + } + size_t size = (int64_t)stats.st_size; + + str.resize(size); + + FILE* fp = fopen(fileName, "r"); + if (fp) { + fread((char*)str.data(), 1, size, fp); + } + fclose(fp); + return true; +} + +void PrintUsage() +{ + fprintf(stderr, + "usage: hlslparser [-h|-g] -i shader.hlsl -o [shader.hlsl | shader.metal]\n" + "Translate DX9-style HLSL shader to HLSL/MSL shader.\n" + " -i input HLSL\n" + " -o output HLSL or MSL\n" + "optional arguments:\n" + " -g debug mode, preserve comments\n" + " -h, --help show this help message and exit\n" + " -line write #file/line directive\n" + " -nohalf turn half into float"); +} + +// Taken from KrmaLog.cpp +static bool endsWith(const string& value, const string& ending) +{ + if (ending.size() > value.size()) { + return false; + } + + // reverse comparison at end of value + if (value.size() < ending.size()) + return false; + uint32_t start = (uint32_t)(value.size() - ending.size()); + + for (uint32_t i = 0; i < ending.size(); ++i) { + if (value[start + i] != ending[i]) + return false; + } + + return true; +} + +// Alec, brought over from kram +static string filenameNoExtension(const char* filename) +{ + const char* dotPosStr = strrchr(filename, '.'); + if (dotPosStr == nullptr) + return filename; + auto dotPos = dotPosStr - filename; + + // now chop off the extension + string filenameNoExt = filename; + return filenameNoExt.substr(0, dotPos); +} + +int main(int argc, char* argv[]) +{ + using namespace M4; + + // Parse arguments + string fileName; + const char* entryName = NULL; + + // TODO: could we take modern DX12 HLSL and translate to MSL only + // That would simplify all this. What spirv-cross already does though. + // Could drop HLSLGenerator then, and just use this to gen MSL. + // Much of the glue code can just be in a header, but having it + // in parser, lets this only splice code that is needed. + + Language language = Language_MSL; + HLSLTarget target = HLSLTarget_PixelShader; + string outputFileName; + bool isDebug = false; + bool isTreatHalfAsFloat = false; + bool isWriteFileLine = false; + + for (int argn = 1; argn < argc; ++argn) { + const char* const arg = argv[argn]; + + if (String_Equal(arg, "-h") || String_Equal(arg, "--help")) { + PrintUsage(); + return 0; + } + + else if (String_Equal(arg, "-o") || String_Equal(arg, "-output")) { + if (++argn < argc) + outputFileName = argv[argn]; + } + else if (String_Equal(arg, "-i") || String_Equal(arg, "-input")) { + if (++argn < argc) + fileName = argv[argn]; + } + else if (String_Equal(arg, "-g")) { + // will preserve double-slash comments where possible + isDebug = true; + } + else if (String_Equal(arg, "-nohalf")) { + // will preserve double-slash comments where possible + isTreatHalfAsFloat = true; + } + else if (String_Equal(arg, "-line")) { + // will preserve double-slash comments where possible + isWriteFileLine = true; + } + + // This is derived from end characters of entry point + // else if( String_Equal( arg, "-vs" ) ) + // { + // target = HLSLTarget_VertexShader; + // } + // else if( String_Equal( arg, "-fs" ) ) + // { + // target = HLSLTarget_PixelShader; + // } + // TODO: require a arg to set entryName + // else if( entryName == NULL ) + // { + // entryName = arg; + // } + else { + Log_Error("Too many arguments\n"); + PrintUsage(); + return 1; + } + } + + if (fileName.empty()) { + Log_Error("Missing source filename\n"); + PrintUsage(); + return 1; + } + if (!endsWith(fileName, "hlsl")) { + Log_Error("Input filename must end with .hlsl\n"); + PrintUsage(); + return 1; + } + + if (outputFileName.empty()) { + Log_Error("Missing dest filename\n"); + PrintUsage(); + return 1; + } + if (endsWith(outputFileName, "hlsl")) { + language = Language_HLSL; + } + else if (endsWith(outputFileName, "metal")) { + language = Language_MSL; + } + else { + Log_Error("Output file must end with .hlsl or msls\n"); + PrintUsage(); + return 1; + } + + // replace the extension on the output file + outputFileName = filenameNoExtension(outputFileName.c_str()); + + // Allow a mix of shaders in file. + // Code now finds entry points. + // outputFileName += (target == HLSLTarget_PixelShader) ? "PS" : "VS"; + + if (language == Language_MSL) { + outputFileName += ".metal"; + } + else if (language == Language_HLSL) { + outputFileName += ".hlsl"; + } + + // Win build on github is failing on this, so skip for now + // find full pathname of the fileName, so that errors are logged + // in way that can be clicked to. absolute includes .. in it, canonical does not. + std::error_code errorCode; // To shutup exceptions + auto path = filesystem::path(fileName); + fileName = filesystem::canonical(path, errorCode).generic_string(); + + // if this file doesn't exist, then canonical throws exception + path = filesystem::path(outputFileName); + if (filesystem::exists(path)) { + outputFileName = filesystem::canonical(path, errorCode).generic_string(); + + if (outputFileName == fileName) { + Log_Error("Src and Dst filenames match. Exiting.\n"); + return 1; + } + } + + //------------------------------------ + // Now start the work + + // Read input file + string source; + if (!ReadFile(fileName.c_str(), source)) { + Log_Error("Input file not found\n"); + return 1; + } + + // Parse input file + Allocator allocator; + HLSLParser parser(&allocator, fileName.c_str(), source.data(), source.size()); + if (isDebug) { + parser.SetKeepComments(true); + } + HLSLTree tree(&allocator); + + // TODO: tie this to CLI, MSL should set both to true + HLSLParserOptions parserOptions; + parserOptions.isHalfst = true; + parserOptions.isHalfio = true; + + if (!parser.Parse(&tree, parserOptions)) { + Log_Error("Parsing failed\n"); + return 1; + } + + int status = 0; + + // build a list of entryPoints + Array entryPoints(&allocator); + if (entryName != nullptr) { + entryPoints.PushBack(entryName); + } + else { + // search all functions with designated endings + HLSLStatement* statement = tree.GetRoot()->statement; + while (statement != NULL) { + if (statement->nodeType == HLSLNodeType_Function) { + HLSLFunction* function = (HLSLFunction*)statement; + const char* name = function->name; + + if (endsWith(name, "VS")) { + entryPoints.PushBack(name); + } + else if (endsWith(name, "PS")) { + entryPoints.PushBack(name); + } + else if (endsWith(name, "CS")) { + entryPoints.PushBack(name); + } + } + + statement = statement->nextStatement; + } + } + + string output; + + for (uint32_t i = 0; i < (uint32_t)entryPoints.GetSize(); ++i) { + const char* entryPoint = entryPoints[i]; + entryName = entryPoint; + if (endsWith(entryPoint, "VS")) + target = HLSLTarget_VertexShader; + else if (endsWith(entryPoint, "PS")) + target = HLSLTarget_PixelShader; + else if (endsWith(entryPoint, "CS")) + target = HLSLTarget_ComputeShader; + + // Generate output + if (language == Language_HLSL) { + HLSLOptions options; + options.writeFileLine = isWriteFileLine; + options.treatHalfAsFloat = isTreatHalfAsFloat; + options.writeVulkan = true; // TODO: tie to CLI + + HLSLGenerator generator; + if (generator.Generate(&tree, target, entryName, options)) { + // write the buffer out + output += generator.GetResult(); + } + else { + Log_Error("Translation failed, aborting\n"); + status = 1; + } + } + else if (language == Language_MSL) { + MSLOptions options; + options.writeFileLine = isWriteFileLine; + options.treatHalfAsFloat = isTreatHalfAsFloat; + + MSLGenerator generator; + if (generator.Generate(&tree, target, entryName, options)) { + // write the buffer out + output += generator.GetResult(); + } + else { + Log_Error("Translation failed, aborting\n"); + status = 1; + } + } + + if (status != 0) + break; + } + + if (status == 0) { + // using wb to avoid having Win convert \n to \r\n + FILE* fp = fopen(outputFileName.c_str(), "wb"); + if (!fp) { + Log_Error("Could not open output file %s\n", outputFileName.c_str()); + return 1; + } + + fprintf(fp, "%s", output.c_str()); + fclose(fp); + } + + // It's not enough to return 1 from main, but set exit code. + if (status) + exit(status); + + return status; +} diff --git a/hlslparser/testshaders.xcodeproj/project.pbxproj b/hlslparser/testshaders.xcodeproj/project.pbxproj new file mode 100644 index 00000000..9b3838d6 --- /dev/null +++ b/hlslparser/testshaders.xcodeproj/project.pbxproj @@ -0,0 +1,259 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 56; + objects = { + +/* Begin PBXFileReference section */ + 707D37D129B9798600B08D22 /* out */ = {isa = PBXFileReference; lastKnownFileType = folder; path = out; sourceTree = ""; }; + 707D37D729B979EB00B08D22 /* Skinning.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Skinning.hlsl; sourceTree = ""; }; + 707D37DA29B97A0900B08D22 /* buildShaders.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = buildShaders.sh; sourceTree = ""; }; + 70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; fileEncoding = 4; path = Sample.hlsl; sourceTree = ""; }; + 70CAA48B29BBD985004B7E7B /* Compute.hlsl */ = {isa = PBXFileReference; explicitFileType = sourcecode.metal; path = Compute.hlsl; sourceTree = ""; }; + 70CAA48F29C63A46004B7E7B /* build.ninja */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = build.ninja; sourceTree = ""; }; + 70CAA49029C8072C004B7E7B /* ShaderHLSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderHLSL.h; sourceTree = ""; }; + 70CAA49129C8072C004B7E7B /* ShaderMSL.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ShaderMSL.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXGroup section */ + 707D37A529B9787400B08D22 = { + isa = PBXGroup; + children = ( + 70CAA48F29C63A46004B7E7B /* build.ninja */, + 707D37DA29B97A0900B08D22 /* buildShaders.sh */, + 707D37D829B979EB00B08D22 /* shaders */, + 707D37D129B9798600B08D22 /* out */, + 707D37AF29B9787400B08D22 /* Products */, + ); + sourceTree = ""; + }; + 707D37AF29B9787400B08D22 /* Products */ = { + isa = PBXGroup; + children = ( + ); + name = Products; + sourceTree = ""; + }; + 707D37D829B979EB00B08D22 /* shaders */ = { + isa = PBXGroup; + children = ( + 70CAA49029C8072C004B7E7B /* ShaderHLSL.h */, + 70CAA49129C8072C004B7E7B /* ShaderMSL.h */, + 70CAA48B29BBD985004B7E7B /* Compute.hlsl */, + 70CAA47E29B9BB0E004B7E7B /* Sample.hlsl */, + 707D37D729B979EB00B08D22 /* Skinning.hlsl */, + ); + path = shaders; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXLegacyTarget section */ + 70CAA48529BAE9F5004B7E7B /* testshaders */ = { + isa = PBXLegacyTarget; + buildArgumentsString = "$(ACTION)"; + buildConfigurationList = 70CAA48629BAE9F5004B7E7B /* Build configuration list for PBXLegacyTarget "testshaders" */; + buildPhases = ( + ); + buildToolPath = /opt/homebrew/bin/ninja; + buildWorkingDirectory = ""; + dependencies = ( + ); + name = testshaders; + passBuildSettingsInEnvironment = 1; + productName = testshaders; + }; +/* End PBXLegacyTarget section */ + +/* Begin PBXProject section */ + 707D37A629B9787400B08D22 /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastUpgradeCheck = 1410; + TargetAttributes = { + 70CAA48529BAE9F5004B7E7B = { + CreatedOnToolsVersion = 14.1; + }; + }; + }; + buildConfigurationList = 707D37A929B9787400B08D22 /* Build configuration list for PBXProject "testshaders" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 707D37A529B9787400B08D22; + productRefGroup = 707D37AF29B9787400B08D22 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 70CAA48529BAE9F5004B7E7B /* testshaders */, + ); + }; +/* End PBXProject section */ + +/* Begin XCBuildConfiguration section */ + 707D37C329B9787500B08D22 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + }; + name = Debug; + }; + 707D37C429B9787500B08D22 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = macosx; + }; + name = Release; + }; + 70CAA48729BAE9F5004B7E7B /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEBUGGING_SYMBOLS = YES; + DEBUG_INFORMATION_FORMAT = dwarf; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OTHER_CFLAGS = ""; + OTHER_LDFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 70CAA48829BAE9F5004B7E7B /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + OTHER_CFLAGS = ""; + OTHER_LDFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 707D37A929B9787400B08D22 /* Build configuration list for PBXProject "testshaders" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 707D37C329B9787500B08D22 /* Debug */, + 707D37C429B9787500B08D22 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 70CAA48629BAE9F5004B7E7B /* Build configuration list for PBXLegacyTarget "testshaders" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 70CAA48729BAE9F5004B7E7B /* Debug */, + 70CAA48829BAE9F5004B7E7B /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 707D37A629B9787400B08D22 /* Project object */; +} diff --git a/kram-preview/KramPreviewViewController.h b/kram-preview/KramPreviewViewController.h index 5acdf430..80df48ea 100644 --- a/kram-preview/KramPreviewViewController.h +++ b/kram-preview/KramPreviewViewController.h @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. diff --git a/kram-preview/KramPreviewViewController.mm b/kram-preview/KramPreviewViewController.mm index bd0f6b7a..c03e7bc8 100644 --- a/kram-preview/KramPreviewViewController.mm +++ b/kram-preview/KramPreviewViewController.mm @@ -1,37 +1,38 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. #import "KramPreviewViewController.h" -#import -#include #import +#include +#import #include "KramLib.h" using namespace kram; // Same code in Preview and Thumbnail -inline NSError* KLOGF(uint32_t code, const char* format, ...) { +inline NSError* KLOGF(uint32_t code, const char* format, ...) +{ string str; - + va_list args; va_start(args, format); /* int32_t len = */ append_vsprintf(str, format, args); va_end(args); - + // log here, so it can see it in Console. But this never appears. // How are you supposed to debug failures? Resorted to passing a unique code into this call. // It wasn't originally supposed to generate an NSError //NSLog(@"%s", str.c_str()); - + // Console prints this as , so what's the point of producing a localizedString ? // This doesn't seem to work to Console app, but maybe if logs are to terminal - // sudo log config --mode "level:debug" --subsystem com.ba.kramv - + // sudo log config --mode "level:debug" --subsystem com.hialec.kramv + NSString* errorText = [NSString stringWithUTF8String:str.c_str()]; - return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey: errorText}]; + return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}]; } @interface KramPreviewViewController () @@ -41,38 +42,45 @@ @implementation KramPreviewViewController { NSImageView* _imageView; } -- (NSString *)nibName { +- (NSString*)nibName +{ return @"KramPreviewViewController"; } -- (void)loadView { +- (void)loadView +{ [super loadView]; // Do any additional setup after loading the view. - + _imageView = [[NSImageView alloc] initWithFrame:self.view.frame]; [_imageView setTranslatesAutoresizingMaskIntoConstraints:NO]; //Required to opt-in to autolayout // no frame, already the default // _imageView.imageFrameStyle = NSImageFrameNone; - + _imageView.imageScaling = NSImageScaleProportionallyUpOrDown; - - [self.view addSubview: _imageView]; - - NSDictionary* views = @{@"myview": _imageView}; + + [self.view addSubview:_imageView]; + + NSDictionary* views = @{@"myview" : _imageView}; [self.view addConstraints:[NSLayoutConstraint - constraintsWithVisualFormat:@"H:|[myview]|" options:0 metrics:nil - views:views]]; + constraintsWithVisualFormat:@"H:|[myview]|" + options:0 + metrics:nil + views:views]]; [self.view addConstraints:[NSLayoutConstraint - constraintsWithVisualFormat:@"V:|[myview]|" options:0 metrics:nil - views:views]]; + constraintsWithVisualFormat:@"V:|[myview]|" + options:0 + metrics:nil + views:views]]; //[NSLayoutConstraint activateConstraints: self.view.constraints]; } // This isn't a view, but hoping this is called -- (void)viewDidAppear { +- (void)viewDidAppear +{ [super viewDidAppear]; - + // this must be called after layer is ready //self.view.layer.backgroundColor = [NSColor blackColor].CGColor; _imageView.layer.backgroundColor = [NSColor blackColor].CGColor; @@ -82,9 +90,9 @@ - (void)viewDidAppear { * Implement this method and set QLSupportsSearchableItems to YES in the Info.plist of the extension if you support CoreSpotlight. * - (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier queryString:(NSString *)queryString completionHandler:(void (^)(NSError * _Nullable))handler { - + // Perform any setup necessary in order to prepare the view. - + // Call the completion handler so Quick Look knows that the preview is fully loaded. // Quick Look will display a loading spinner while the completion handler is not called. @@ -92,43 +100,43 @@ - (void)preparePreviewOfSearchableItemWithIdentifier:(NSString *)identifier quer } */ -- (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSError * _Nullable))handler { - +- (void)preparePreviewOfFileAtURL:(NSURL*)url completionHandler:(void (^)(NSError* _Nullable))handler +{ NSError* error = nil; const char* filename = [url fileSystemRepresentation]; -// if (![_imageView isKindOfClass:[NSImageView class]]) { -// error = KLOGF(9, "kramv %s expected NSImageView \n", filename); -// handler(error); -// return; -// } - + // if (![_imageView isKindOfClass:[NSImageView class]]) { + // error = KLOGF(9, "kramv %s expected NSImageView \n", filename); + // handler(error); + // return; + // } + // Add the supported content types to the QLSupportedContentTypes array in the Info.plist of the extension. // Perform any setup necessary in order to prepare the view. - + // The following is adapted out of Thumbnailer - + // No request here, may need to use view size uint32_t maxWidth = _imageView.frame.size.width; uint32_t maxHeight = _imageView.frame.size.height; - + // ignore upper case extensions if (!isSupportedFilename(filename)) { error = KLOGF(1, "kramv %s only supports ktx, ktx2, dds files\n", filename); handler(error); return; } - + KTXImage image; KTXImageData imageData; TexEncoder decoderType = kTexEncoderUnknown; - + if (!imageData.open(filename, image)) { error = KLOGF(2, "kramv %s coould not open file\n", filename); handler(error); return; } - + // This will set decoder auto textureType = MyMTLTextureType2D; // image.textureType if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) { @@ -136,13 +144,13 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr handler(error); return; } - + bool isPremul = image.isPremul(); bool isSrgb = isSrgbFormat(image.pixelFormat); - + // unpack a level to get the blocks uint32_t mipNumber = 0; - + uint32_t mipCount = image.mipCount(); uint32_t w, h, d; for (uint32_t i = 0; i < mipCount; ++i) { @@ -151,25 +159,24 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr mipNumber++; } } - + // clamp to smallest mipNumber = std::min(mipNumber, mipCount - 1); image.mipDimensions(mipNumber, w, h, d); - + uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array? uint32_t numChunks = image.totalChunks(); - + vector mipData; // new decode the blocks in that chunk if (isBlockFormat(image.pixelFormat)) { - uint64_t mipLength = image.mipLevels[mipNumber].length; - - // then decode any blocks to rgba8u, not dealing with HDR formats yet + + // then decode any blocks to rgba8u, not dealing with HDR formats yet if (image.isSupercompressed()) { const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; - + mipData.resize(mipLength * numChunks); uint8_t* dstData = mipData.data(); if (!image.unpackLevel(mipNumber, srcData, dstData)) { @@ -177,7 +184,7 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr handler(error); return; } - + // now extract the chunk for the thumbnail out of that level if (numChunks > 1) { macroUnusedVar(chunkNum); @@ -187,66 +194,63 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr mipData.resize(mipLength); } } - else - { + else { // this just truncate to chunk 0 instead of copying chunkNum first mipData.resize(mipLength); - + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; - + memcpy(mipData.data(), srcData, mipLength); } - + KramDecoder decoder; KramDecoderParams params; - + // TODO: should honor swizzle in the ktx image // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red // if sdf, will be signed format and that will stay red - - switch(image.pixelFormat) - { + + switch (image.pixelFormat) { // To avoid showing single channel content in red, replicate to rgb case MyMTLPixelFormatBC4_RUnorm: case MyMTLPixelFormatEAC_R11Unorm: params.swizzleText = "rrr1"; break; - + default: break; } - + vector dstMipData; - + // only space for one chunk for now dstMipData.resize(numChunks * h * w * sizeof(Color)); - + // want to just decode one chunk of the level that was unpacked abovve if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) { error = KLOGF(6, "kramv %s failed to decode blocks\n", filename); handler(error); return; } - + mipData = dstMipData; } - else if (isExplicitFormat(image.pixelFormat)) - { + else if (isExplicitFormat(image.pixelFormat)) { Image image2D; if (!image2D.loadThumbnailFromKTX(image, mipNumber)) { error = KLOGF(7, "kramv %s failed to convert image to 4 channels\n", filename); handler(error); return; } - + // TODO: could swizzle height (single channel) textures to rrr1 - + // copy from Color back to uint8_t uint32_t mipSize = h * w * sizeof(Color); mipData.resize(mipSize); memcpy(mipData.data(), image2D.pixels().data(), mipSize); } - + // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101 uint32_t rowBytes = w * sizeof(Color); @@ -254,25 +258,25 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr // use vimage in the Accelerate.framework // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049 - vImage_Buffer buf = { mipData.data(), h, w, rowBytes }; + vImage_Buffer buf = {mipData.data(), h, w, rowBytes}; // Declare the pixel format for the vImage_Buffer vImage_CGImageFormat format = { - .bitsPerComponent = 8, - .bitsPerPixel = 32, + .bitsPerComponent = 8, + .bitsPerPixel = 32, }; - + format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast); format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB(); - + // don't need to allocate, can requse memory from mip // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically? // see how big thumbs.db is after running this - + // This doesn't allocate, but in an imageView that must outlast the handle call, does that work? bool skipPixelCopy = false; - + vImage_Error err = 0; CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err); if (err) { @@ -283,30 +287,29 @@ - (void)preparePreviewOfFileAtURL:(NSURL *)url completionHandler:(void (^)(NSErr CGRect rect = CGRectMake(0, 0, w, h); NSImage* nsImage = [[NSImage alloc] initWithCGImage:cgImage size:rect.size]; - + NSImageView* nsImageView = _imageView; // (NSImageView*)self.view; - + // Copositing is like it's using NSCompositeCopy instead of SourceOver // The default is NSCompositeSourceOver. NSRectFill() ignores // -[NSGraphicsContext compositingOperation] and continues to use NSCompositeCopy. // So may have to use NSFillRect which uses SourceOver // https://cocoadev.github.io/NSCompositingOperation/ - + nsImageView.image = nsImage; // This seems to cause plugin to fail with NoAllocate set // This leaks a CGImageRef, but the CGImage doesn't hold any memory w/NoAllocate. if (!skipPixelCopy) CGImageRelease(cgImage); - + // TODO: could add description with info from texture (format, etc) // self.textView.text = ... - + // Call the completion handler so Quick Look knows that the preview is fully loaded. // Quick Look will display a loading spinner while the completion handler is not called. - + handler(nil); } @end - diff --git a/kram-preview/kram_preview.entitlements b/kram-preview/kram_preview.entitlements index f2ef3ae0..18aff0ce 100644 --- a/kram-preview/kram_preview.entitlements +++ b/kram-preview/kram_preview.entitlements @@ -2,9 +2,9 @@ - com.apple.security.app-sandbox - - com.apple.security.files.user-selected.read-only - + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-only + diff --git a/kram-profile/CBA/Analysis.cpp b/kram-profile/CBA/Analysis.cpp new file mode 100755 index 00000000..527cf227 --- /dev/null +++ b/kram-profile/CBA/Analysis.cpp @@ -0,0 +1,628 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense + +//#ifdef _MSC_VER +//struct IUnknown; // workaround for old Win SDK header failures when using /permissive- +//#endif + +// This is for windows.h +//#ifndef NOMINMAX +//#define NOMINMAX +//#endif + +#include "Analysis.h" +#include "Arena.h" +//#include "Colors.h" +#include "Utils.h" +//#include "external/flat_hash_map/bytell_hash_map.hpp" +//#include "external/inih/cpp/INIReader.h" +#include +#include +#include +#include +#include +#include +#include + +// from kram +// returns length of chars appended, -1 if failure +#define STL_NAMESPACE std +using namespace STL_NAMESPACE; +int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3); + +namespace col +{ + const char* kBold = ""; + const char* kRed = ""; + const char* kGreen = ""; + const char* kYellow = ""; + const char* kBlue = ""; + const char* kMagenta = ""; + const char* kCyan = ""; + const char* kWhite = ""; + const char* kReset = ""; + + /* not using + kBold = "\x1B[1m"; + kRed = "\x1B[91m"; + kGreen = "\x1B[32m"; + kYellow = "\x1B[33m"; + kBlue = "\x1B[34m"; + kMagenta = "\x1B[35m"; + kCyan = "\x1B[36m"; + kWhite = "\x1B[37m"; + kReset = "\x1B[0m"; + */ +} + +struct Config +{ + int fileParseCount = 10; + int fileCodegenCount = 10; + int templateCount = 30; + int functionCount = 30; + int headerCount = 10; + int headerChainCount = 5; + + int minFileTime = 10; + + int maxName = 70; + + bool onlyRootHeaders = true; +}; + +struct pair_hash +{ + template + std::size_t operator () (const std::pair& p) const + { + auto h1 = std::hash{}(p.first); + auto h2 = std::hash{}(p.second); + return h1 + 0x9e3779b9 + (h2<<6) + (h2>>2); + } +}; + + +struct Analysis +{ + Analysis(const BuildEvents& events_, BuildNames& buildNames_, std::string& out_) + : events(events_) + , buildNames(buildNames_) + , out(out_) + { + functions.reserve(256); + instantiations.reserve(256); + parseFiles.reserve(64); + codegenFiles.reserve(64); + headerMap.reserve(256); + } + + const BuildEvents& events; + BuildNames& buildNames; + + std::string& out; + + std::string_view GetBuildName(DetailIndex index) + { + assert(index.idx >= 0); + assert(index.idx < static_cast(buildNames.size())); + + return buildNames[index]; + } + + void ProcessEvent(EventIndex eventIndex); + int largestDetailIndex = 0; + void EndAnalysis(); + + void FindExpensiveHeaders(); + void ReadConfig(); + + DetailIndex FindPath(EventIndex eventIndex) const; + + struct InstantiateEntry + { + int count = 0; + int64_t us = 0; + }; + struct FileEntry + { + DetailIndex file; + int64_t us; + }; + struct IncludeChain + { + std::vector files; + int64_t us = 0; + int count = 0; + }; + struct IncludeEntry + { + int64_t us = 0; + int count = 0; + bool root = false; + std::vector includePaths; + }; + + std::unordered_map collapsedNames; + std::string_view GetCollapsedName(DetailIndex idx); + void EmitCollapsedTemplates(); + void EmitCollapsedTemplateOpt(); + void EmitCollapsedInfo( + const std::unordered_map &collapsed, + const char *header_string); + + // key is (name,objfile), value is milliseconds + typedef std::pair IndexPair; + std::unordered_map functions; + std::unordered_map instantiations; + std::vector parseFiles; + std::vector codegenFiles; + int64_t totalParseUs = 0; + int64_t totalCodegenUs = 0; + int totalParseCount = 0; + + std::unordered_map headerMap; + std::vector> expensiveHeaders; + + Config config; +}; + +DetailIndex Analysis::FindPath(EventIndex eventIndex) const +{ + while(eventIndex > EventIndex()) + { + const BuildEvent& ev = events[eventIndex]; + if (ev.type == BuildEventType::kCompiler || ev.type == BuildEventType::kFrontend || ev.type == BuildEventType::kBackend || ev.type == BuildEventType::kOptModule) + if (ev.detailIndex != DetailIndex()) + return ev.detailIndex; + eventIndex = ev.parent; + } + return DetailIndex(); +} + +void Analysis::ProcessEvent(EventIndex eventIndex) +{ + const BuildEvent& event = events[eventIndex]; + largestDetailIndex = (std::max)(largestDetailIndex, event.detailIndex.idx); + + if (event.type == BuildEventType::kOptFunction) + { + auto funKey = std::make_pair(event.detailIndex, FindPath(eventIndex)); + functions[funKey] += event.dur; + } + + if (event.type == BuildEventType::kInstantiateClass || event.type == BuildEventType::kInstantiateFunction) + { + auto& e = instantiations[eventIndex]; + ++e.count; + e.us += event.dur; + } + + if (event.type == BuildEventType::kFrontend) + { + totalParseUs += event.dur; + ++totalParseCount; + if (event.dur >= config.minFileTime * 1000) + { + FileEntry fe; + fe.file = FindPath(eventIndex); + fe.us = event.dur; + parseFiles.emplace_back(fe); + } + } + if (event.type == BuildEventType::kBackend) + { + totalCodegenUs += event.dur; + if (event.dur >= config.minFileTime * 1000) + { + FileEntry fe; + fe.file = FindPath(eventIndex); + fe.us = event.dur; + codegenFiles.emplace_back(fe); + } + } + if (event.type == BuildEventType::kParseFile) + { + std::string_view path = GetBuildName(event.detailIndex); + if (utils::IsHeader(path)) + { + IncludeEntry& e = headerMap[path]; + e.us += event.dur; + ++e.count; + + // record chain of ParseFile entries leading up to this one + IncludeChain chain; + chain.us = event.dur; + EventIndex parseIndex = event.parent; + bool hasHeaderBefore = false; + while(parseIndex.idx >= 0) + { + const BuildEvent& ev2 = events[parseIndex]; + if (ev2.type != BuildEventType::kParseFile) + break; + std::string_view ev2path = GetBuildName(ev2.detailIndex); + bool isHeader = utils::IsHeader(ev2path); + if (!isHeader) + break; + chain.files.push_back(ev2.detailIndex); + hasHeaderBefore |= isHeader; + parseIndex = ev2.parent; + } + + e.root |= !hasHeaderBefore; + e.includePaths.push_back(chain); + } + } +} + +static std::string_view CollapseName(const std::string_view& elt) +{ + // Parsing op<, op<<, op>, and op>> seems hard. Just skip'm all + if (elt.find("operator") != std::string::npos) + return elt; + + std::string retval; + retval.reserve(elt.size()); + auto b_range = elt.begin(); + auto e_range = elt.begin(); + while (b_range != elt.end()) + { + e_range = std::find(b_range, elt.end(), '<'); + if (e_range == elt.end()) + break; + ++e_range; + retval.append(b_range, e_range); + retval.append("$"); + b_range = e_range; + int open_count = 1; + // find the matching close angle bracket + for (; b_range != elt.end(); ++b_range) + { + if (*b_range == '<') + { + ++open_count; + continue; + } + if (*b_range == '>') + { + if (--open_count == 0) + { + break; + } + continue; + } + } + // b_range is now pointing at a close angle, or it is at the end of the string + } + if (b_range > e_range) + { + // we are in a wacky case where something like op> showed up in a mangled name. + // just bail. + // TODO: this still isn't correct, but it avoids crashes. + return elt; + } + // append the footer + retval.append(b_range, e_range); + + size_t size = retval.size(); + char* ptr = (char*)ArenaAllocate(size+1); + memcpy(ptr, retval.c_str(), size+1); + return std::string_view(ptr, size); +} + +std::string_view Analysis::GetCollapsedName(DetailIndex detail) +{ + std::string_view& name = collapsedNames[detail]; + if (name.empty()) + name = CollapseName(GetBuildName(detail)); + return name; +} + +void Analysis::EmitCollapsedInfo( + const std::unordered_map &collapsed, + const char *header_string) +{ + std::vector> sorted_collapsed; + sorted_collapsed.resize(std::min(config.templateCount, collapsed.size())); + auto cmp = [](const auto &lhs, const auto &rhs) { + return std::tie(lhs.second.us, lhs.second.count, lhs.first) > std::tie(rhs.second.us, rhs.second.count, rhs.first); + }; + std::partial_sort_copy( + collapsed.begin(), collapsed.end(), + sorted_collapsed.begin(), sorted_collapsed.end(), + cmp); + + append_sprintf(out, "%s%s**** %s%s:\n", col::kBold, col::kMagenta, header_string, col::kReset); + for (const auto &elt : sorted_collapsed) + { + std::string dname = elt.first; + if (static_cast(dname.size()) > config.maxName) + dname = dname.substr(0, config.maxName - 2) + "..."; + int ms = int(elt.second.us / 1000); + int avg = int(ms / elt.second.count); + append_sprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), elt.second.count, avg); + } + append_sprintf(out, "\n"); +} +void Analysis::EmitCollapsedTemplates() +{ + std::unordered_map collapsed; + for (const auto& inst : instantiations) + { + const std::string_view name = GetCollapsedName(events[inst.first].detailIndex); + auto &stats = collapsed[name]; + + bool recursive = false; + EventIndex p = events[inst.first].parent; + while (p != EventIndex(-1)) + { + auto &event = events[p]; + if (event.type == BuildEventType::kInstantiateClass || event.type == BuildEventType::kInstantiateFunction) + { + const std::string_view ancestor_name = GetCollapsedName(event.detailIndex); + if (ancestor_name == name) + { + recursive = true; + break; + } + } + p = event.parent; + } + if (!recursive) + { + stats.us += inst.second.us; + stats.count += inst.second.count; + } + } + EmitCollapsedInfo(collapsed, "Template sets that took longest to instantiate"); +} + +void Analysis::EmitCollapsedTemplateOpt() +{ + std::unordered_map collapsed; + for (const auto& fn : functions) + { + auto fnNameIndex = fn.first.first; + const std::string_view fnName = GetBuildName(fnNameIndex); + // if we're not related to templates at all, skip + if (fnName.find('<') == std::string::npos) + continue; + + auto &stats = collapsed[GetCollapsedName(fnNameIndex)]; + ++stats.count; + stats.us += fn.second; + } + EmitCollapsedInfo(collapsed, "Function sets that took longest to compile / optimize"); +} + +void Analysis::EndAnalysis() +{ + if (totalParseUs || totalCodegenUs) + { + append_sprintf(out, "%s%s**** Time summary%s:\n", col::kBold, col::kMagenta, col::kReset); + append_sprintf(out, "Compilation (%i times):\n", totalParseCount); + append_sprintf(out, " Parsing (frontend): %s%7.1f%s s\n", col::kBold, static_cast(totalParseUs) / 1000000.0, col::kReset); + append_sprintf(out, " Codegen & opts (backend): %s%7.1f%s s\n", col::kBold, static_cast(totalCodegenUs) / 1000000.0, col::kReset); + append_sprintf(out, "\n"); + } + + if (!parseFiles.empty()) + { + std::vector indices; + indices.resize(parseFiles.size()); + for (size_t i = 0; i < indices.size(); ++i) + indices[i] = int(i); + std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) { + const auto& a = parseFiles[indexA]; + const auto& b = parseFiles[indexB]; + if (a.us != b.us) + return a.us > b.us; + return GetBuildName(a.file) < GetBuildName(b.file); + }); + append_sprintf(out, "%s%s**** Files that took longest to parse (compiler frontend)%s:\n", col::kBold, col::kMagenta, col::kReset); + for (size_t i = 0, n = std::min(config.fileParseCount, indices.size()); i != n; ++i) + { + const auto& e = parseFiles[indices[i]]; + append_sprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data()); + } + append_sprintf(out, "\n"); + } + if (!codegenFiles.empty()) + { + std::vector indices; + indices.resize(codegenFiles.size()); + for (size_t i = 0; i < indices.size(); ++i) + indices[i] = int(i); + std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) { + const auto& a = codegenFiles[indexA]; + const auto& b = codegenFiles[indexB]; + if (a.us != b.us) + return a.us > b.us; + return GetBuildName(a.file) < GetBuildName(b.file); + }); + append_sprintf(out, "%s%s**** Files that took longest to codegen (compiler backend)%s:\n", col::kBold, col::kMagenta, col::kReset); + for (size_t i = 0, n = std::min(config.fileCodegenCount, indices.size()); i != n; ++i) + { + const auto& e = codegenFiles[indices[i]]; + append_sprintf(out, "%s%6i%s ms: %s\n", col::kBold, int(e.us/1000), col::kReset, GetBuildName(e.file).data()); + } + append_sprintf(out, "\n"); + } + + if (!instantiations.empty()) + { + std::vector> instArray; + instArray.resize(largestDetailIndex+1); + for (const auto& inst : instantiations) //collapse the events + { + DetailIndex d = events[inst.first].detailIndex; + instArray[d.idx].first = d; + instArray[d.idx].second.us += inst.second.us; + instArray[d.idx].second.count += inst.second.count; + } + size_t n = std::min(config.templateCount, instArray.size()); + auto cmp = [&](const auto&a, const auto &b) { + return + std::tie(a.second.us, a.second.count, a.first) > + std::tie(b.second.us, b.second.count, b.first); + }; + std::partial_sort(instArray.begin(), instArray.begin()+n, instArray.end(), cmp); + append_sprintf(out, "%s%s**** Templates that took longest to instantiate%s:\n", col::kBold, col::kMagenta, col::kReset); + for (size_t i = 0; i != n; ++i) + { + const auto& e = instArray[i]; + std::string dname = std::string(GetBuildName(e.first)); + if (static_cast(dname.size()) > config.maxName) + dname = dname.substr(0, config.maxName-2) + "..."; + int ms = int(e.second.us / 1000); + int avg = int(ms / std::max(e.second.count,1)); + append_sprintf(out, "%s%6i%s ms: %s (%i times, avg %i ms)\n", col::kBold, ms, col::kReset, dname.c_str(), e.second.count, avg); + } + append_sprintf(out, "\n"); + + EmitCollapsedTemplates(); + } + + if (!functions.empty()) + { + std::vector> functionsArray; + std::vector indices; + functionsArray.reserve(functions.size()); + indices.reserve(functions.size()); + for (const auto& fn : functions) + { + functionsArray.emplace_back(fn); + indices.emplace_back((int)indices.size()); + } + + std::sort(indices.begin(), indices.end(), [&](int indexA, int indexB) { + const auto& a = functionsArray[indexA]; + const auto& b = functionsArray[indexB]; + if (a.second != b.second) + return a.second > b.second; + return GetBuildName(a.first.first) < GetBuildName(b.first.first); + }); + append_sprintf(out, "%s%s**** Functions that took longest to compile%s:\n", col::kBold, col::kMagenta, col::kReset); + for (size_t i = 0, n = std::min(config.functionCount, indices.size()); i != n; ++i) + { + const auto& e = functionsArray[indices[i]]; + std::string dname = std::string(GetBuildName(e.first.first)); + if (static_cast(dname.size()) > config.maxName) + dname = dname.substr(0, config.maxName-2) + "..."; + int ms = int(e.second / 1000); + append_sprintf(out, "%s%6i%s ms: %s (%s)\n", col::kBold, ms, col::kReset, dname.c_str(), GetBuildName(e.first.second).data()); + } + append_sprintf(out, "\n"); + EmitCollapsedTemplateOpt(); + } + + FindExpensiveHeaders(); + + if (!expensiveHeaders.empty()) + { + append_sprintf(out, "%s%s**** Expensive headers%s:\n", col::kBold, col::kMagenta, col::kReset); + for (const auto& e : expensiveHeaders) + { + const auto& es = headerMap[e.first]; + int ms = int(e.second / 1000); + int avg = ms / es.count; + append_sprintf(out, "%s%i%s ms: %s%s%s (included %i times, avg %i ms), included via:\n", col::kBold, ms, col::kReset, col::kBold, e.first.data(), col::kReset, es.count, avg); + int pathCount = 0; + + // print most costly include chains + // merge identical include chains, recording their (count, totalTimeUs) + std::map, std::pair> mergedIncludeChains; + for (const auto& chain : es.includePaths) + { + auto& dst = mergedIncludeChains[chain.files]; + dst.first++; + dst.second += chain.us; + } + std::vector sortedIncludeChains; + sortedIncludeChains.reserve(mergedIncludeChains.size()); + for (const auto& chain : mergedIncludeChains) + { + IncludeChain dst; + dst.files = chain.first; + dst.count = chain.second.first; + dst.us = chain.second.second; + sortedIncludeChains.emplace_back(dst); + } + std::sort(sortedIncludeChains.begin(), sortedIncludeChains.end(), [](const auto& a, const auto& b) + { + if (a.count != b.count) + return a.count > b.count; + if (a.us != b.us) + return a.us > b.us; + return a.files < b.files; + }); + for (const auto& chain : sortedIncludeChains) + { + append_sprintf(out, " %ix: ", chain.count); + for (auto it = chain.files.rbegin(), itEnd = chain.files.rend(); it != itEnd; ++it) + { + append_sprintf(out, "%s ", utils::GetFilename(GetBuildName(*it)).data()); + } + if (chain.files.empty()) + append_sprintf(out, ""); + append_sprintf(out, "\n"); + ++pathCount; + if (pathCount > config.headerChainCount) + break; + } + if (pathCount > config.headerChainCount) + { + append_sprintf(out, " ...\n"); + } + + append_sprintf(out, "\n"); + } + } +} + +void Analysis::FindExpensiveHeaders() +{ + expensiveHeaders.reserve(headerMap.size()); + for (const auto& kvp : headerMap) + { + if (config.onlyRootHeaders && !kvp.second.root) + continue; + expensiveHeaders.push_back(std::make_pair(kvp.first, kvp.second.us)); + } + std::sort(expensiveHeaders.begin(), expensiveHeaders.end(), [&](const auto& a, const auto& b) + { + if (a.second != b.second) + return a.second > b.second; + return a.first < b.first; + }); + if (static_cast(expensiveHeaders.size()) > config.headerCount) + expensiveHeaders.resize(config.headerCount); +} + +void Analysis::ReadConfig() +{ +// No longer reading ini file +// INIReader ini("ClangBuildAnalyzer.ini"); +// +// config.fileParseCount = (int)ini.GetInteger("counts", "fileParse", config.fileParseCount); +// config.fileCodegenCount = (int)ini.GetInteger("counts", "fileCodegen", config.fileCodegenCount); +// config.functionCount = (int)ini.GetInteger("counts", "function", config.functionCount); +// config.templateCount = (int)ini.GetInteger("counts", "template", config.templateCount); +// config.headerCount = (int)ini.GetInteger("counts", "header", config.headerCount); +// config.headerChainCount = (int)ini.GetInteger("counts", "headerChain", config.headerChainCount); +// +// config.minFileTime = (int)ini.GetInteger("minTimes", "file", config.minFileTime); +// +// config.maxName = (int)ini.GetInteger("misc", "maxNameLength", config.maxName); +// config.onlyRootHeaders = ini.GetBoolean("misc", "onlyRootHeaders",config.onlyRootHeaders); +} + + +void DoAnalysis(const BuildEvents& events, BuildNames& names, std::string& out) +{ + Analysis a(events, names, out); + a.ReadConfig(); + for (int i = 0, n = (int)events.size(); i != n; ++i) + a.ProcessEvent(EventIndex(i)); + a.EndAnalysis(); +} diff --git a/kram-profile/CBA/Analysis.h b/kram-profile/CBA/Analysis.h new file mode 100755 index 00000000..83f93f35 --- /dev/null +++ b/kram-profile/CBA/Analysis.h @@ -0,0 +1,9 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#pragma once + +#include + +#include "BuildEvents.h" + +void DoAnalysis(const BuildEvents& events, BuildNames& names, std::string& out); diff --git a/kram-profile/CBA/Arena.cpp b/kram-profile/CBA/Arena.cpp new file mode 100755 index 00000000..c5d69e75 --- /dev/null +++ b/kram-profile/CBA/Arena.cpp @@ -0,0 +1,49 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense + +#include +#include +#include + +struct ArenaBlock +{ + uint8_t* buffer; + size_t bufferSize; + size_t used; +}; + +static std::vector s_Blocks; + +const size_t kDefaultBlockSize = 65536; + + +void ArenaInitialize() +{ +} + +void ArenaDelete() +{ + for (auto& b : s_Blocks) + delete[] b.buffer; + s_Blocks.clear(); +} + +void* ArenaAllocate(size_t size) +{ + // do we need a new block? + if (s_Blocks.empty() || s_Blocks.back().used + size > s_Blocks.back().bufferSize) + { + ArenaBlock block; + block.bufferSize = std::max(size, kDefaultBlockSize); + block.buffer = new uint8_t[block.bufferSize]; + block.used = 0; + s_Blocks.emplace_back(block); + } + + // allocate from the last block + ArenaBlock& b = s_Blocks.back(); + void* ptr = b.buffer + b.used; + b.used += size; + return ptr; +} + diff --git a/kram-profile/CBA/Arena.h b/kram-profile/CBA/Arena.h new file mode 100755 index 00000000..0656d73b --- /dev/null +++ b/kram-profile/CBA/Arena.h @@ -0,0 +1,7 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#pragma once + +void ArenaInitialize(); +void ArenaDelete(); +void* ArenaAllocate(size_t size); diff --git a/kram-profile/CBA/BuildEvents.cpp b/kram-profile/CBA/BuildEvents.cpp new file mode 100755 index 00000000..36a9dec7 --- /dev/null +++ b/kram-profile/CBA/BuildEvents.cpp @@ -0,0 +1,680 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#include "BuildEvents.h" + +// This is for windows.h +//#ifndef NOMINMAX +//#define NOMINMAX +//#endif + +#include "Arena.h" +//#include "Colors.h" +#include "Utils.h" +//#include "external/cute_files.h" +//#include "external/flat_hash_map/bytell_hash_map.hpp" +//#include "external/llvm-Demangle/include/Demangle.h" +#include "simdjson.h" +//#include "external/xxHash/xxhash.h" +#include +#include +#include +#include + +// Taken out of kram +// case-sensitive fnv1a hash, can pass existing hash to continue a hash +inline uint32_t HashFnv1a(const char* val, uint32_t hash = 0x811c9dc5) { + const uint32_t prime = 0x01000193; // 16777619 (32-bit) + while (*val) { + hash = (hash * prime) ^ (uint32_t)*val++; + } + return hash; +} + +extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_); + +struct HashedString +{ + explicit HashedString(const char* s) + { + len = strlen(s); + hash = HashFnv1a(s); // , len, 0); + str = s; + } + size_t hash; + size_t len; + const char* str; +}; +namespace std +{ + template<> struct hash + { + size_t operator()(const HashedString& v) const + { + return v.hash; + } + }; + template<> struct equal_to + { + bool operator()(const HashedString& a, const HashedString& b) const + { + return a.hash == b.hash && a.len == b.len && memcmp(a.str, b.str, a.len) == 0; + } + }; +} // namespace std + +typedef std::unordered_map NameToIndexMap; + +/* +static void DebugPrintEvents(const BuildEvents& events, const BuildNames& names) +{ + for (size_t i = 0; i < events.size(); ++i) + { + const BuildEvent& event = events[EventIndex(int(i))]; + const std::string_view namesSubstr = names[event.detailIndex].substr(0, 130); + printf("%4zi: t=%i t1=%7lld t2=%7lld par=%4i ch=%4zi det=%.*s\n", i, (int) event.type, event.ts, event.ts+event.dur, event.parent.idx, event.children.size(), (int)namesSubstr.size(), namesSubstr.data()); + } +} +*/ + +static void FindParentChildrenIndices(BuildEvents& events) +{ + if (events.empty()) + return; + + // sort events by start time so that parent events go before child events + std::vector sortedIndices; + sortedIndices.resize(events.size()); + for (int i = 0, n = (int)events.size(); i != n; ++i) + sortedIndices[i] = EventIndex(i); + std::sort(sortedIndices.begin(), sortedIndices.end(), [&](EventIndex ia, EventIndex ib){ + const auto& ea = events[ia]; + const auto& eb = events[ib]; + if (ea.ts != eb.ts) + return ea.ts < eb.ts; + // break start time ties by making longer events go first (they must be parent) + if (ea.dur != eb.dur) + return ea.dur > eb.dur; + // break ties by assuming that later events in sequence must start parent + return ia > ib; + }); + + // figure out the event hierarchy; for now the parent/child indices are into + // the "sortedIndices" array and not event indices in the "events" array. + // As a result, we will be digging into .idx members a lot, as we are temporarily + // putting the wrong kind of index into 'parent'. + int root = 0; + BuildEvent* evRoot = &events[sortedIndices[root]]; + evRoot->parent.idx = -1; + for (int i = 1, n = (int)events.size(); i != n; ++i) + { + BuildEvent* ev2 = &events[sortedIndices[i]]; + while (root != -1) + { + // add slice if within bounds + if (ev2->ts >= evRoot->ts && ev2->ts+ev2->dur <= evRoot->ts+evRoot->dur) + { + ev2->parent.idx = root; + evRoot->children.push_back(EventIndex(i)); + break; + } + + root = evRoot->parent.idx; + if (root != -1) + evRoot = &events[sortedIndices[root]]; + } + if (root == -1) + { + ev2->parent.idx = -1; + } + root = i; + evRoot = &events[sortedIndices[i]]; + } + + // fixup event parent/child indices to be into "events" array + for (auto& e : events) + { + for (auto& c : e.children) + c = sortedIndices[c.idx]; + if (e.parent.idx != -1) + e.parent = sortedIndices[e.parent.idx]; + } + +#ifndef NDEBUG + for (int i = 0, n = (int)events.size(); i != n; ++i) + { + assert(i != events[EventIndex(i)].parent.idx); + } +#endif +} + +struct BuildEventsParser +{ + BuildEventsParser() + { + // make sure zero index is empty + NameToIndex("", resultNameToIndex); + resultNames.push_back(std::string_view(resultNameToIndex.begin()->first.str, 0)); + + resultEvents.reserve(2048); + resultNames.reserve(2048); + } + + BuildEvents resultEvents; + BuildNames resultNames; + NameToIndexMap resultNameToIndex; + std::mutex resultMutex; + std::mutex arenaMutex; + + void AddEvents(BuildEvents& add, const NameToIndexMap& nameToIndex) + { + // we got job-local build events and name-to-index mapping; + // add them to the global result with any necessary remapping. + // gotta take a mutex since we're modifying shared state here. + std::scoped_lock lock(resultMutex); + + // move events to end of result events list + int offset = (int)resultEvents.size(); + std::move(add.begin(), add.end(), std::back_inserter(resultEvents)); + add.clear(); + + // create remapping from name indices, adding them to global remapping + // list if necessary. + std::unordered_map detailRemap; + for (const auto& kvp : nameToIndex) + { + const auto& existing = resultNameToIndex.find(kvp.first); + if (existing == resultNameToIndex.end()) + { + DetailIndex index((int)resultNameToIndex.size()); + resultNameToIndex.insert(std::make_pair(kvp.first, index)); + resultNames.push_back(std::string_view(kvp.first.str, kvp.first.len)); + detailRemap[kvp.second] = index; + } + else + { + detailRemap[kvp.second] = existing->second; + } + } + + // adjust the added event indices + for (size_t i = offset, n = resultEvents.size(); i != n; ++i) + { + BuildEvent& ev = resultEvents[EventIndex(int(i))]; + if (ev.parent.idx >= 0) + ev.parent.idx += offset; + for (auto& ch : ev.children) + ch.idx += offset; + if (ev.detailIndex.idx != 0) + { + assert(ev.detailIndex.idx >= 0); + assert(ev.detailIndex.idx < static_cast(nameToIndex.size())); + ev.detailIndex = detailRemap[ev.detailIndex]; + assert(ev.detailIndex.idx >= 0); + assert(ev.detailIndex.idx < static_cast(resultNameToIndex.size())); + } + } + + assert(resultNameToIndex.size() == resultNames.size()); + } + + + DetailIndex NameToIndex(const char* str, NameToIndexMap& nameToIndex) + { + HashedString hashedName(str); + auto it = nameToIndex.find(hashedName); + if (it != nameToIndex.end()) + return it->second; + + char* strCopy; + { + // arena allocator is not thread safe, take a mutex + std::scoped_lock lock(arenaMutex); + strCopy = (char*)ArenaAllocate(hashedName.len+1); + } + memcpy(strCopy, str, hashedName.len+1); + hashedName.str = strCopy; + + DetailIndex index((int)nameToIndex.size()); + nameToIndex.insert(std::make_pair(hashedName, index)); + return index; + } + + bool ParseRoot(simdjson::dom::element& it, const std::string& curFileName) + { + simdjson::dom::element nit; + if (it["traceEvents"].get(nit)) + return false; + return ParseTraceEvents(nit, curFileName); + } + + bool ParseTraceEvents(simdjson::dom::element& it, const std::string& curFileName) + { + if (!it.is_array()) + return false; + + NameToIndexMap nameToIndexLocal; + NameToIndex("", nameToIndexLocal); // make sure zero index is empty + BuildEvents fileEvents; + fileEvents.reserve(256); + for (simdjson::dom::element nit : it) + { + ParseEvent(nit, curFileName, fileEvents, nameToIndexLocal); + } + if (fileEvents.empty()) + return false; + + FindParentChildrenIndices(fileEvents); + if (fileEvents.back().parent.idx != -1) + { + //printf("%sWARN: the last trace event should be root; was not in '%s'.%s\n", col::kRed, curFileName.c_str(), col::kReset); + return false; + } + AddEvents(fileEvents, nameToIndexLocal); + return true; + } + + static bool StrEqual(std::string_view a, const char* b) + { + return a == b; + } + + static bool StartsWith(std::string_view a, const char* b, int blen) + { + return static_cast(a.size()) >= blen && a.compare(0, blen, b) == 0; + } + + const char* kPid = "pid"; + const char* kTid = "tid"; + const char* kPh = "ph"; + const char* kName = "name"; + const char* kTs = "ts"; + const char* kDur = "dur"; + const char* kArgs = "args"; + const char* kDetail = "detail"; + + void ParseEvent(simdjson::dom::element& it, const std::string& curFileName, BuildEvents& fileEvents, NameToIndexMap& nameToIndexLocal) + { + simdjson::dom::object node; + if (it.get(node)) + { + //printf("%sERROR: 'traceEvents' elements in JSON should be objects.%s\n", col::kRed, col::kReset); + resultEvents.clear(); + return; + } + + BuildEvent event; + bool valid = true; + std::string_view detailPtr; + for (simdjson::dom::key_value_pair kv : node) + { + std::string_view nodeKey = kv.key; + if (StrEqual(nodeKey, kPid)) + { + if (!kv.value.is_int64()) // starting with Clang/LLVM 11 process IDs are not necessarily 1 + valid = false; + } + else if (StrEqual(nodeKey, kTid)) + { + if (!kv.value.is_int64()) // starting with Clang/LLVM 11 thread IDs are not necessarily 0 + valid = false; + } + else if (StrEqual(nodeKey, kPh)) + { + if (!kv.value.is_string() || !StrEqual(kv.value.get_string(), "X")) + valid = false; + } + else if (StrEqual(nodeKey, kName) && kv.value.is_string() && valid) + { + std::string_view name = kv.value.get_string(); + if (StrEqual(name, "ExecuteCompiler")) + event.type = BuildEventType::kCompiler; + else if (StrEqual(name, "Frontend")) + event.type = BuildEventType::kFrontend; + else if (StrEqual(name, "Backend")) + event.type = BuildEventType::kBackend; + else if (StrEqual(name, "Source")) + event.type = BuildEventType::kParseFile; + else if (StrEqual(name, "ParseTemplate")) + event.type = BuildEventType::kParseTemplate; + else if (StrEqual(name, "ParseClass")) + event.type = BuildEventType::kParseClass; + else if (StrEqual(name, "InstantiateClass")) + event.type = BuildEventType::kInstantiateClass; + else if (StrEqual(name, "InstantiateFunction")) + event.type = BuildEventType::kInstantiateFunction; + else if (StrEqual(name, "OptModule")) + event.type = BuildEventType::kOptModule; + else if (StrEqual(name, "OptFunction")) + event.type = BuildEventType::kOptFunction; + } + else if (StrEqual(nodeKey, kTs)) + { + if (kv.value.is_int64()) + event.ts = kv.value.get_int64(); + else + valid = false; + } + else if (StrEqual(nodeKey, kDur)) + { + if (kv.value.is_int64()) + event.dur = kv.value.get_int64(); + else + valid = false; + } + else if (StrEqual(nodeKey, kArgs)) + { + if (kv.value.is_object()) + { + simdjson::dom::object kvo(kv.value); + simdjson::dom::key_value_pair args = *kvo.begin(); + if (args.value.is_string()) + detailPtr = args.value.get_string(); + } + } + }; + + if (event.type== BuildEventType::kUnknown || !valid) + return; + + // if the "compiler" event has no detail name, use the current json file name + if (detailPtr.empty() && event.type == BuildEventType::kCompiler) + detailPtr = curFileName; + if (!detailPtr.empty()) + { + std::string detailString; + if (event.type == BuildEventType::kParseFile || event.type == BuildEventType::kOptModule) + { + // do various cleanups/nice-ifications of the detail name: + // make paths shorter (i.e. relative to project) where possible + detailString = utils::GetNicePath(detailPtr); + + // switch json to .o or .obj (or .cpp) + if (utils::EndsWith(detailString, ".json")) + { + detailString = std::string(detailString.substr(0, detailString.length()-4)) + "o"; + } + } + else + { + detailString = detailPtr; + + // Use the kram demangle + // clang needs to fix this, since Win clang symbols don't demangle using macOS demangle + if (event.type == BuildEventType::kOptFunction) + { + const char* demangledName = demangleSymbolName(detailString.c_str()); + if (demangledName != nullptr) + detailString = demangledName; + } + } + + + /* don't do this + // don't report the clang trace .json file, instead get the object file at the same location if it's there + if (utils::EndsWith(detailString, ".json")) + { + std::string candidate = std::string(detailString.substr(0, detailString.length()-4)) + "o"; + // check for .o + if (cf_file_exists(candidate.c_str())) + detailString = candidate; + else + { + // check for .obj + candidate += "bj"; + if (cf_file_exists(candidate.c_str())) + detailString = candidate; + } + } + + // TODO: may need to demangle again + // demangle possibly mangled names + if (event.type == BuildEventType::kOptFunction) + detailString = llvm::demangle(detailString); + */ + + event.detailIndex = NameToIndex(detailString.c_str(), nameToIndexLocal); + } + + fileEvents.emplace_back(event); + } +}; + +BuildEventsParser* CreateBuildEventsParser() +{ + BuildEventsParser* p = new BuildEventsParser(); + return p; +} +void DeleteBuildEventsParser(BuildEventsParser* parser) +{ + delete parser; +} + +BuildEvents& GetBuildEvents(BuildEventsParser& parser) +{ + return parser.resultEvents; +} +BuildNames& GetBuildNames(BuildEventsParser& parser) +{ + return parser.resultNames; +} + +bool ParseBuildEvents(BuildEventsParser* parser, const uint8_t* buf, size_t bufSize, const std::string& fileName) +{ + using namespace simdjson; + dom::parser p; + dom::element doc; + auto error = p.parse(buf, bufSize).get(doc); + if (error) + { +// printf("%sWARN: JSON parse error in %s: %s.%s\n", col::kYellow, fileName.c_str(), error_message(error), col::kReset); + return false; + } + + return parser->ParseRoot(doc, fileName); + //DebugPrintEvents(outEvents, outNames); +} + +/* +struct BufferedWriter +{ + BufferedWriter(FILE* f) + : size(0) + , file(f) + { + hasher = XXH64_createState(); + XXH64_reset(hasher, 0); + } + ~BufferedWriter() + { + Flush(); + XXH64_hash_t hash = XXH64_digest(hasher); + fwrite(&hash, sizeof(hash), 1, file); + fclose(file); + XXH64_freeState(hasher); + } + + template void Write(const T& t) + { + Write(&t, sizeof(t)); + } + void Write(const void* ptr, size_t sz) + { + if (sz == 0) return; + if (sz >= kBufferSize) + { + if( size > 0 ) + { + Flush(); + } + + XXH64_update(hasher, ptr, sz); + fwrite(ptr, sz, 1, file); + return; + } + if (sz + size > kBufferSize) + Flush(); + memcpy(&buffer[size], ptr, sz); + size += sz; + } + + + void Flush() + { + fwrite(buffer, size, 1, file); + XXH64_update(hasher, buffer, size); + size = 0; + } + + enum { kBufferSize = 65536 }; + uint8_t buffer[kBufferSize]; + size_t size; + FILE* file; + XXH64_state_t* hasher; +}; + +struct BufferedReader +{ + BufferedReader(FILE* f) + : pos(0) + { + fseek(f, 0, SEEK_END); + size_t fsize = ftello64(f); + fseek(f, 0, SEEK_SET); + buffer = new uint8_t[fsize]; + bufferSize = fsize; + fread(buffer, bufferSize, 1, f); + fclose(f); + } + ~BufferedReader() + { + delete[] buffer; + } + + template void Read(T& t) + { + Read(&t, sizeof(t)); + } + void Read(void* ptr, size_t sz) + { + if (pos + sz > bufferSize) + { + memset(ptr, 0, sz); + return; + } + memcpy(ptr, &buffer[pos], sz); + pos += sz; + } + + uint8_t* buffer; + size_t pos; + size_t bufferSize; +}; + +const uint32_t kFileMagic = 'CBA0'; + +bool SaveBuildEvents(BuildEventsParser* parser, const std::string& fileName) +{ + FILE* f = fopen(fileName.c_str(), "wb"); + if (f == nullptr) + { + printf("%sERROR: failed to save to file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset); + return false; + } + + BufferedWriter w(f); + + w.Write(kFileMagic); + int64_t eventsCount = parser->resultEvents.size(); + w.Write(eventsCount); + for(const auto& e : parser->resultEvents) + { + int32_t eType = (int32_t)e.type; + w.Write(eType); + w.Write(e.ts); + w.Write(e.dur); + w.Write(e.detailIndex.idx); + w.Write(e.parent.idx); + int64_t childCount = e.children.size(); + w.Write(childCount); + w.Write(e.children.data(), childCount * sizeof(e.children[0])); + } + + int64_t namesCount = parser->resultNames.size(); + w.Write(namesCount); + for(const auto& n : parser->resultNames) + { + uint32_t nSize = (uint32_t)n.size(); + w.Write(nSize); + w.Write(n.data(), nSize); + } + + return true; +} + +bool LoadBuildEvents(const std::string& fileName, BuildEvents& outEvents, BuildNames& outNames) +{ + FILE* f = fopen(fileName.c_str(), "rb"); + if (f == nullptr) + { + printf("%sERROR: failed to open file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset); + return false; + } + + BufferedReader r(f); + if (r.bufferSize < 12) // 4 bytes magic header, 8 bytes hash at end + { + printf("%sERROR: corrupt input file '%s' (size too small)%s\n", col::kRed, fileName.c_str(), col::kReset); + return false; + } + // check header magic + int32_t magic = 0; + r.Read(magic); + if (magic != kFileMagic) + { + printf("%sERROR: unknown format of input file '%s'%s\n", col::kRed, fileName.c_str(), col::kReset); + return false; + } + // chech hash checksum + XXH64_hash_t hash = XXH64(r.buffer, r.bufferSize-sizeof(XXH64_hash_t), 0); + if (memcmp(&hash, r.buffer+r.bufferSize-sizeof(XXH64_hash_t), sizeof(XXH64_hash_t)) != 0) + { + printf("%sERROR: corrupt input file '%s' (checksum mismatch)%s\n", col::kRed, fileName.c_str(), col::kReset); + return false; + } + + int64_t eventsCount = 0; + r.Read(eventsCount); + outEvents.resize(eventsCount); + for(auto& e : outEvents) + { + int32_t eType; + r.Read(eType); + e.type = (BuildEventType)eType; + r.Read(e.ts); + r.Read(e.dur); + r.Read(e.detailIndex.idx); + r.Read(e.parent.idx); + int64_t childCount = 0; + r.Read(childCount); + e.children.resize(childCount); + if (childCount != 0) + r.Read(&e.children[0], childCount * sizeof(e.children[0])); + } + + int64_t namesCount = 0; + r.Read(namesCount); + outNames.resize(namesCount); + for(auto& n : outNames) + { + uint32_t nSize = 0; + r.Read(nSize); + char* ptr = (char*)ArenaAllocate(nSize+1); + memset(ptr, 0, nSize+1); + n = std::string_view(ptr, nSize); + if (nSize != 0) + r.Read(ptr, nSize); + } + + return true; +} +*/ diff --git a/kram-profile/CBA/BuildEvents.h b/kram-profile/CBA/BuildEvents.h new file mode 100755 index 00000000..79e15258 --- /dev/null +++ b/kram-profile/CBA/BuildEvents.h @@ -0,0 +1,121 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#pragma once +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include +#include + + +//#ifdef _MSC_VER +//#define ftello64 _ftelli64 +//#elif defined(__APPLE__) +//#define ftello64 ftello +//#endif + + +enum class BuildEventType +{ + kUnknown, + kCompiler, + kFrontend, + kBackend, + kParseFile, + kParseTemplate, + kParseClass, + kInstantiateClass, + kInstantiateFunction, + kOptModule, + kOptFunction, +}; + +struct DetailIndex +{ + int idx; + explicit DetailIndex(int d = 0) : idx(d) {} + +#if __cplusplus >= 202002L + // C++20 can autogen most of these from like <=> operator + auto operator<=>(const DetailIndex& rhs) const = default; +#else + bool operator==(DetailIndex rhs) const { return idx == rhs.idx; } + bool operator!=(DetailIndex rhs) const { return idx != rhs.idx; } + bool operator<(DetailIndex rhs) const { return idx < rhs.idx; } + bool operator>(DetailIndex rhs) const { return idx > rhs.idx; } + bool operator<=(DetailIndex rhs) const { return idx <= rhs.idx; } + bool operator>=(DetailIndex rhs) const { return idx >= rhs.idx; } +#endif +}; + +struct EventIndex +{ + int idx; + explicit EventIndex(int e = -1) : idx(e) {} + +#if __cplusplus >= 202002L + // C++20 can autogen most of these from like <=> operator + auto operator<=>(const EventIndex& rhs) const = default; +#else + bool operator==(EventIndex rhs) const { return idx == rhs.idx; } + bool operator!=(EventIndex rhs) const { return idx != rhs.idx; } + bool operator<(EventIndex rhs) const { return idx < rhs.idx; } + bool operator>(EventIndex rhs) const { return idx > rhs.idx; } + bool operator<=(EventIndex rhs) const { return idx <= rhs.idx; } + bool operator>=(EventIndex rhs) const { return idx >= rhs.idx; } +#endif +}; + +namespace std +{ + template <> struct hash + { + size_t operator()(DetailIndex x) const + { + return hash()(x.idx); + } + }; + template <> struct hash + { + size_t operator()(EventIndex x) const + { + return hash()(x.idx); + } + }; +} + +struct BuildEvent +{ + BuildEventType type = BuildEventType::kUnknown; + int64_t ts = 0; + int64_t dur = 0; + DetailIndex detailIndex; + EventIndex parent{ -1 }; + std::vector children; +}; + +template +struct IndexedVector : std::vector +{ + using std::vector::vector; + typename std::vector::reference operator[](Idx pos) { return this->begin()[pos.idx]; } + typename std::vector::const_reference operator[](Idx pos) const { return this->begin()[pos.idx]; } +}; +typedef IndexedVector BuildNames; +typedef IndexedVector BuildEvents; + +struct BuildEventsParser; +BuildEventsParser* CreateBuildEventsParser(); +void DeleteBuildEventsParser(BuildEventsParser* parser); + +// NOTE: can be called in parallel +bool ParseBuildEvents(BuildEventsParser* parser, const uint8_t* buf, size_t bufSize, const std::string& fileName); + +//bool SaveBuildEvents(BuildEventsParser* parser, const std::string& fileName); +// +//bool LoadBuildEvents(const std::string& fileName, BuildEvents& outEvents, BuildNames& outNames); + +BuildEvents& GetBuildEvents(BuildEventsParser& parser); +BuildNames& GetBuildNames(BuildEventsParser& parser); + diff --git a/kram-profile/CBA/CBA.h b/kram-profile/CBA/CBA.h new file mode 100644 index 00000000..d8d48bcb --- /dev/null +++ b/kram-profile/CBA/CBA.h @@ -0,0 +1,16 @@ +#import "Foundation/Foundation.h" + +@interface CBA : NSObject + +- (_Nonnull instancetype)init; +- (void)deinit; + +// Can parseAll or one file at time +- (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename; +- (void)parseAll:(NSArray * _Nonnull)files filenames:(NSArray * _Nonnull)filenames; + +// This isn't so useful, since need specific files to parse +- (NSString* _Nonnull)analyzeAll; +- (NSString* _Nonnull)analyze:(NSArray * _Nonnull)filenames; + +@end diff --git a/kram-profile/CBA/CBA.mm b/kram-profile/CBA/CBA.mm new file mode 100644 index 00000000..f5f48cef --- /dev/null +++ b/kram-profile/CBA/CBA.mm @@ -0,0 +1,82 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense + +#import "CBA.h" + +// This is for windows.h +//#define _CRT_SECURE_NO_WARNINGS +//#define NOMINMAX + +#include "Analysis.h" +#include "Arena.h" +#include "BuildEvents.h" +//#include "Colors.h" +#include "Utils.h" + +#include +#include +#include +#include +#include +#include + +//#ifdef _MSC_VER +//struct IUnknown; // workaround for old Win SDK header failures when using /permissive- +//#endif + + +@implementation CBA { + BuildEventsParser* parser; +} + +- (_Nonnull instancetype)init { + ArenaInitialize(); + + parser = CreateBuildEventsParser(); + + return self; +} + +- (void)deinit { + // Shutdown the parser + DeleteBuildEventsParser(parser); + parser = nullptr; + + ArenaDelete(); +} + +// This is bad because it runs single-threaded, and doesn't cache anything across builds. +// TODO: restructure, so parser is built once +// feed files to it individually, and then request analysis on a few of the events/names +// TODO: reformat output to Perfetto json, can then display it visually. +- (void)parseAll:(NSArray * _Nonnull)files filenames:(NSArray * _Nonnull)filenames +{ + for (uint32_t i = 0; i < files.count; ++i) { + [self parse:files[i] filename:filenames[i]]; + } +} + +- (void)parse:(NSData* _Nonnull)file filename:(NSString* _Nonnull)filename { + const char* filename_ = [filename UTF8String]; + ParseBuildEvents(parser, (const uint8_t*)file.bytes, file.length, filename_); +} + + +- (NSString* _Nonnull)analyzeAll { + // Run the analysis on data from the parser. + std::string out; + DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out); + + return [NSString stringWithUTF8String:out.c_str()]; +} + +- (NSString* _Nonnull)analyze:(NSArray * _Nonnull)filenames { + // Run the analysis on data from the parser. + std::string out; + DoAnalysis(GetBuildEvents(*parser), GetBuildNames(*parser), out); + + return [NSString stringWithUTF8String:out.c_str()]; +} + +@end + diff --git a/kram-profile/CBA/Utils.cpp b/kram-profile/CBA/Utils.cpp new file mode 100755 index 00000000..851a0e21 --- /dev/null +++ b/kram-profile/CBA/Utils.cpp @@ -0,0 +1,93 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#include "Utils.h" + +//#include "external/cwalk/cwalk.h" +#include + +inline char ToLower(char c) { return (c >= 'A' && c <= 'Z') ? (c + 'a' - 'A') : c; } + +/* +inline char ToUpper(char c) { return (c >= 'a' && c <= 'z') ? (c - ('a' - 'A')) : c; } + +void utils::Lowercase(std::string& path) +{ + for (size_t i = 0, n = path.size(); i != n; ++i) + path[i] = ToLower(path[i]); +} + + +bool utils::BeginsWith(const std::string& str, const std::string& prefix) +{ + if (str.size() < prefix.size()) + return false; + for (size_t i = 0, n = prefix.size(); i != n; ++i) + { + char c1 = ToLower(str[i]); + char c2 = ToLower(prefix[i]); + if (c1 != c2) + return false; + } + return true; +} +*/ + +bool utils::EndsWith(const std::string_view& str, const std::string& suffix) +{ + if (str.size() < suffix.size()) + return false; + size_t start = str.size() - suffix.size(); + for (size_t i = 0, n = suffix.size(); i != n; ++i) + { + char c1 = ToLower(str[i+start]); + char c2 = ToLower(suffix[i]); + if (c1 != c2) + return false; + } + return true; +} + +bool utils::IsHeader(std::string_view path) +{ + path = utils::GetFilename(path); + size_t dot = path.rfind('.'); + if (dot == std::string::npos) + return true; // no extension is likely a header, e.g. + + size_t len = path.size(); + if (dot + 1 < len && (ToLower(path[dot + 1]) == 'h' || ToLower(path[dot + 1]) == 'i')) + return true; // extension starting with .h or .i (h, hpp, hxx, inc etc) likely a header + + return false; +} + +std::string utils::GetNicePath(const std::string_view& path) +{ + char input[FILENAME_MAX]; + size_t len = std::min(path.size(), FILENAME_MAX-1); + memcpy(input, path.data(), len); + input[len] = 0; + char result[FILENAME_MAX]; + + // kram: skip the normalization + // cwk_path_normalize(input, result, sizeof(result)); + strlcpy(result, input, sizeof(result)); + + // convert to forward slashes + char *p = result; + while (*p) + { + if (*p == '\\') + *p = '/'; + ++p; + } + return result; +} + +std::string_view utils::GetFilename(const std::string_view& path) +{ + size_t dirIdx = path.rfind('/'); + if (dirIdx != std::string::npos) + return path.substr(dirIdx + 1, path.size() - dirIdx - 1); + return path; +} diff --git a/kram-profile/CBA/Utils.h b/kram-profile/CBA/Utils.h new file mode 100755 index 00000000..52f5e5f3 --- /dev/null +++ b/kram-profile/CBA/Utils.h @@ -0,0 +1,21 @@ +// Clang Build Analyzer https://github.com/aras-p/ClangBuildAnalyzer +// SPDX-License-Identifier: Unlicense +#pragma once +#include +#include + +namespace utils +{ + [[nodiscard]] std::string GetNicePath(const std::string_view& path); + [[nodiscard]] std::string_view GetFilename(const std::string_view& path); + + [[nodiscard]] bool IsHeader(std::string_view path); + + /* + void Lowercase(std::string& path); + + [[nodiscard]] bool BeginsWith(const std::string& str, const std::string& prefix); + */ + [[nodiscard]] bool EndsWith(const std::string_view& str, const std::string& suffix); + +} diff --git a/kram-profile/CBA/simdjson.cpp b/kram-profile/CBA/simdjson.cpp new file mode 100644 index 00000000..2b7b3132 --- /dev/null +++ b/kram-profile/CBA/simdjson.cpp @@ -0,0 +1,15984 @@ +/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */ +/* begin file src/simdjson.cpp */ +#include "simdjson.h" + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +/* begin file src/to_chars.cpp */ +#include +#include +#include +#include + +namespace simdjson { +namespace internal { +/*! +implements the Grisu2 algorithm for binary to decimal floating-point +conversion. +Adapted from JSON for Modern C++ + +This implementation is a slightly modified version of the reference +implementation which may be obtained from +http://florian.loitsch.com/publications (bench.tar.gz). +The code is distributed under the MIT license, Copyright (c) 2009 Florian +Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing +Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the +ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation, +PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and +Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming +Language Design and Implementation, PLDI 1996 +*/ +namespace dtoa_impl { + +template +Target reinterpret_bits(const Source source) { + static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); + + Target target; + std::memcpy(&target, &source, sizeof(Source)); + return target; +} + +struct diyfp // f * 2^e +{ + static constexpr int kPrecision = 64; // = q + + std::uint64_t f = 0; + int e = 0; + + constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {} + + /*! + @brief returns x - y + @pre x.e == y.e and x.f >= y.f + */ + static diyfp sub(const diyfp &x, const diyfp &y) noexcept { + + return {x.f - y.f, x.e}; + } + + /*! + @brief returns x * y + @note The result is rounded. (Only the upper q bits are returned.) + */ + static diyfp mul(const diyfp &x, const diyfp &y) noexcept { + static_assert(kPrecision == 64, "internal error"); + + // Computes: + // f = round((x.f * y.f) / 2^q) + // e = x.e + y.e + q + + // Emulate the 64-bit * 64-bit multiplication: + // + // p = u * v + // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) + // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + + // 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 )) + // + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + + // 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) = + // (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + + // p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) + + // 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) + // + // (Since Q might be larger than 2^32 - 1) + // + // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) + // + // (Q_hi + H does not overflow a 64-bit int) + // + // = p_lo + 2^64 p_hi + + const std::uint64_t u_lo = x.f & 0xFFFFFFFFu; + const std::uint64_t u_hi = x.f >> 32u; + const std::uint64_t v_lo = y.f & 0xFFFFFFFFu; + const std::uint64_t v_hi = y.f >> 32u; + + const std::uint64_t p0 = u_lo * v_lo; + const std::uint64_t p1 = u_lo * v_hi; + const std::uint64_t p2 = u_hi * v_lo; + const std::uint64_t p3 = u_hi * v_hi; + + const std::uint64_t p0_hi = p0 >> 32u; + const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu; + const std::uint64_t p1_hi = p1 >> 32u; + const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu; + const std::uint64_t p2_hi = p2 >> 32u; + + std::uint64_t Q = p0_hi + p1_lo + p2_lo; + + // The full product might now be computed as + // + // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) + // p_lo = p0_lo + (Q << 32) + // + // But in this particular case here, the full p_lo is not required. + // Effectively we only need to add the highest bit in p_lo to p_hi (and + // Q_hi + 1 does not overflow). + + Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up + + const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u); + + return {h, x.e + y.e + 64}; + } + + /*! + @brief normalize x such that the significand is >= 2^(q-1) + @pre x.f != 0 + */ + static diyfp normalize(diyfp x) noexcept { + + while ((x.f >> 63u) == 0) { + x.f <<= 1u; + x.e--; + } + + return x; + } + + /*! + @brief normalize x such that the result has the exponent E + @pre e >= x.e and the upper e - x.e bits of x.f must be zero. + */ + static diyfp normalize_to(const diyfp &x, + const int target_exponent) noexcept { + const int delta = x.e - target_exponent; + + return {x.f << delta, target_exponent}; + } +}; + +struct boundaries { + diyfp w; + diyfp minus; + diyfp plus; +}; + +/*! +Compute the (normalized) diyfp representing the input number 'value' and its +boundaries. +@pre value must be finite and positive +*/ +template boundaries compute_boundaries(FloatType value) { + + // Convert the IEEE representation into a diyfp. + // + // If v is denormal: + // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) + // If v is normalized: + // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) + + static_assert(std::numeric_limits::is_iec559, + "internal error: dtoa_short requires an IEEE-754 " + "floating-point implementation"); + + constexpr int kPrecision = + std::numeric_limits::digits; // = p (includes the hidden bit) + constexpr int kBias = + std::numeric_limits::max_exponent - 1 + (kPrecision - 1); + constexpr int kMinExp = 1 - kBias; + constexpr std::uint64_t kHiddenBit = std::uint64_t{1} + << (kPrecision - 1); // = 2^(p-1) + + using bits_type = typename std::conditional::type; + + const std::uint64_t bits = reinterpret_bits(value); + const std::uint64_t E = bits >> (kPrecision - 1); + const std::uint64_t F = bits & (kHiddenBit - 1); + + const bool is_denormal = E == 0; + const diyfp v = is_denormal + ? diyfp(F, kMinExp) + : diyfp(F + kHiddenBit, static_cast(E) - kBias); + + // Compute the boundaries m- and m+ of the floating-point value + // v = f * 2^e. + // + // Determine v- and v+, the floating-point predecessor and successor if v, + // respectively. + // + // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) + // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) + // + // v+ = v + 2^e + // + // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ + // between m- and m+ round to v, regardless of how the input rounding + // algorithm breaks ties. + // + // ---+-------------+-------------+-------------+-------------+--- (A) + // v- m- v m+ v+ + // + // -----------------+------+------+-------------+-------------+--- (B) + // v- m- v m+ v+ + + const bool lower_boundary_is_closer = F == 0 && E > 1; + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_minus = lower_boundary_is_closer + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) + + // Determine the normalized w+ = m+. + const diyfp w_plus = diyfp::normalize(m_plus); + + // Determine w- = m- such that e_(w-) = e_(w+). + const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); + + return {diyfp::normalize(v), w_minus, w_plus}; +} + +// Given normalized diyfp w, Grisu needs to find a (normalized) cached +// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies +// within a certain range [alpha, gamma] (Definition 3.2 from [1]) +// +// alpha <= e = e_c + e_w + q <= gamma +// +// or +// +// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q +// <= f_c * f_w * 2^gamma +// +// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies +// +// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma +// +// or +// +// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) +// +// The choice of (alpha,gamma) determines the size of the table and the form of +// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well +// in practice: +// +// The idea is to cut the number c * w = f * 2^e into two parts, which can be +// processed independently: An integral part p1, and a fractional part p2: +// +// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e +// = (f div 2^-e) + (f mod 2^-e) * 2^e +// = p1 + p2 * 2^e +// +// The conversion of p1 into decimal form requires a series of divisions and +// modulos by (a power of) 10. These operations are faster for 32-bit than for +// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be +// achieved by choosing +// +// -e >= 32 or e <= -32 := gamma +// +// In order to convert the fractional part +// +// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... +// +// into decimal form, the fraction is repeatedly multiplied by 10 and the digits +// d[-i] are extracted in order: +// +// (10 * p2) div 2^-e = d[-1] +// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... +// +// The multiplication by 10 must not overflow. It is sufficient to choose +// +// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. +// +// Since p2 = f mod 2^-e < 2^-e, +// +// -e <= 60 or e >= -60 := alpha + +constexpr int kAlpha = -60; +constexpr int kGamma = -32; + +struct cached_power // c = f * 2^e ~= 10^k +{ + std::uint64_t f; + int e; + int k; +}; + +/*! +For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached +power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c +satisfies (Definition 3.2 from [1]) + alpha <= e_c + e + q <= gamma. +*/ +inline cached_power get_cached_power_for_binary_exponent(int e) { + // Now + // + // alpha <= e_c + e + q <= gamma (1) + // ==> f_c * 2^alpha <= c * 2^e * 2^q + // + // and since the c's are normalized, 2^(q-1) <= f_c, + // + // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) + // ==> 2^(alpha - e - 1) <= c + // + // If c were an exact power of ten, i.e. c = 10^k, one may determine k as + // + // k = ceil( log_10( 2^(alpha - e - 1) ) ) + // = ceil( (alpha - e - 1) * log_10(2) ) + // + // From the paper: + // "In theory the result of the procedure could be wrong since c is rounded, + // and the computation itself is approximated [...]. In practice, however, + // this simple function is sufficient." + // + // For IEEE double precision floating-point numbers converted into + // normalized diyfp's w = f * 2^e, with q = 64, + // + // e >= -1022 (min IEEE exponent) + // -52 (p - 1) + // -52 (p - 1, possibly normalize denormal IEEE numbers) + // -11 (normalize the diyfp) + // = -1137 + // + // and + // + // e <= +1023 (max IEEE exponent) + // -52 (p - 1) + // -11 (normalize the diyfp) + // = 960 + // + // This binary exponent range [-1137,960] results in a decimal exponent + // range [-307,324]. One does not need to store a cached power for each + // k in this range. For each such k it suffices to find a cached power + // such that the exponent of the product lies in [alpha,gamma]. + // This implies that the difference of the decimal exponents of adjacent + // table entries must be less than or equal to + // + // floor( (gamma - alpha) * log_10(2) ) = 8. + // + // (A smaller distance gamma-alpha would require a larger table.) + + // NB: + // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. + + constexpr int kCachedPowersMinDecExp = -300; + constexpr int kCachedPowersDecStep = 8; + + static constexpr std::array kCachedPowers = {{ + {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292}, + {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276}, + {0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260}, + {0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244}, + {0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228}, + {0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212}, + {0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196}, + {0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180}, + {0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164}, + {0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148}, + {0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132}, + {0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116}, + {0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100}, + {0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84}, + {0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68}, + {0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52}, + {0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36}, + {0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20}, + {0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4}, + {0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12}, + {0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28}, + {0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44}, + {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60}, + {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76}, + {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92}, + {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108}, + {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124}, + {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140}, + {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156}, + {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172}, + {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188}, + {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204}, + {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220}, + {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236}, + {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252}, + {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268}, + {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284}, + {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300}, + {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316}, + {0x9E19DB92B4E31BA9, 1013, 324}, + }}; + + // This computation gives exactly the same results for k as + // k = ceil((kAlpha - e - 1) * 0.30102999566398114) + // for |e| <= 1500, but doesn't require floating-point operations. + // NB: log_10(2) ~= 78913 / 2^18 + const int f = kAlpha - e - 1; + const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); + + const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / + kCachedPowersDecStep; + + const cached_power cached = kCachedPowers[static_cast(index)]; + + return cached; +} + +/*! +For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. +For n == 0, returns 1 and sets pow10 := 1. +*/ +inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) { + // LCOV_EXCL_START + if (n >= 1000000000) { + pow10 = 1000000000; + return 10; + } + // LCOV_EXCL_STOP + else if (n >= 100000000) { + pow10 = 100000000; + return 9; + } else if (n >= 10000000) { + pow10 = 10000000; + return 8; + } else if (n >= 1000000) { + pow10 = 1000000; + return 7; + } else if (n >= 100000) { + pow10 = 100000; + return 6; + } else if (n >= 10000) { + pow10 = 10000; + return 5; + } else if (n >= 1000) { + pow10 = 1000; + return 4; + } else if (n >= 100) { + pow10 = 100; + return 3; + } else if (n >= 10) { + pow10 = 10; + return 2; + } else { + pow10 = 1; + return 1; + } +} + +inline void grisu2_round(char *buf, int len, std::uint64_t dist, + std::uint64_t delta, std::uint64_t rest, + std::uint64_t ten_k) { + + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // ten_k + // <------> + // <---- rest ----> + // --------------[------------------+----+--------------]-------------- + // w V + // = buf * 10^k + // + // ten_k represents a unit-in-the-last-place in the decimal representation + // stored in buf. + // Decrement buf by ten_k while this takes buf closer to w. + + // The tests are written in this order to avoid overflow in unsigned + // integer arithmetic. + + while (rest < dist && delta - rest >= ten_k && + (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { + buf[len - 1]--; + rest += ten_k; + } +} + +/*! +Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. +M- and M+ must be normalized and share the same exponent -60 <= e <= -32. +*/ +inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent, + diyfp M_minus, diyfp w, diyfp M_plus) { + static_assert(kAlpha >= -60, "internal error"); + static_assert(kGamma <= -32, "internal error"); + + // Generates the digits (and the exponent) of a decimal floating-point + // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's + // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= + // gamma. + // + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // Grisu2 generates the digits of M+ from left to right and stops as soon as + // V is in [M-,M+]. + + std::uint64_t delta = + diyfp::sub(M_plus, M_minus) + .f; // (significand of (M+ - M-), implicit exponent is e) + std::uint64_t dist = + diyfp::sub(M_plus, w) + .f; // (significand of (M+ - w ), implicit exponent is e) + + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e + + const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e); + + auto p1 = static_cast( + M_plus.f >> + -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e + + // 1) + // + // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] + + std::uint32_t pow10; + const int k = find_largest_pow10(p1, pow10); + + // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) + // + // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) + // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) + // + // M+ = p1 + p2 * 2^e + // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e + // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e + // = d[k-1] * 10^(k-1) + ( rest) * 2^e + // + // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) + // + // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] + // + // but stop as soon as + // + // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e + + int n = k; + while (n > 0) { + // Invariants: + // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) + // pow10 = 10^(n-1) <= p1 < 10^n + // + const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) + const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) + // + // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e + // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) + // + p1 = r; + n--; + // + // M+ = buffer * 10^n + (p1 + p2 * 2^e) + // pow10 = 10^n + // + + // Now check if enough digits have been generated. + // Compute + // + // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e + // + // Note: + // Since rest and delta share the same exponent e, it suffices to + // compare the significands. + const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2; + if (rest <= delta) { + // V = buffer * 10^n, with M- <= V <= M+. + + decimal_exponent += n; + + // We may now just stop. But instead look if the buffer could be + // decremented to bring V closer to w. + // + // pow10 = 10^n is now 1 ulp in the decimal representation V. + // The rounding procedure works with diyfp's with an implicit + // exponent of e. + // + // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e + // + const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e; + grisu2_round(buffer, length, dist, delta, rest, ten_n); + + return; + } + + pow10 /= 10; + // + // pow10 = 10^(n-1) <= p1 < 10^n + // Invariants restored. + } + + // 2) + // + // The digits of the integral part have been generated: + // + // M+ = d[k-1]...d[1]d[0] + p2 * 2^e + // = buffer + p2 * 2^e + // + // Now generate the digits of the fractional part p2 * 2^e. + // + // Note: + // No decimal point is generated: the exponent is adjusted instead. + // + // p2 actually represents the fraction + // + // p2 * 2^e + // = p2 / 2^-e + // = d[-1] / 10^1 + d[-2] / 10^2 + ... + // + // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) + // + // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m + // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) + // + // using + // + // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) + // = ( d) * 2^-e + ( r) + // + // or + // 10^m * p2 * 2^e = d + r * 2^e + // + // i.e. + // + // M+ = buffer + p2 * 2^e + // = buffer + 10^-m * (d + r * 2^e) + // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e + // + // and stop as soon as 10^-m * r * 2^e <= delta * 2^e + + int m = 0; + for (;;) { + // Invariant: + // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) + // * 2^e + // = buffer * 10^-m + 10^-m * (p2 ) + // * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e = + // buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + + // (10*p2 mod 2^-e)) * 2^e + // + p2 *= 10; + const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e + const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e + // + // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) + // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + p2 = r; + m++; + // + // M+ = buffer * 10^-m + 10^-m * p2 * 2^e + // Invariant restored. + + // Check if enough digits have been generated. + // + // 10^-m * p2 * 2^e <= delta * 2^e + // p2 * 2^e <= 10^m * delta * 2^e + // p2 <= 10^m * delta + delta *= 10; + dist *= 10; + if (p2 <= delta) { + break; + } + } + + // V = buffer * 10^-m, with M- <= V <= M+. + + decimal_exponent -= m; + + // 1 ulp in the decimal representation is now 10^-m. + // Since delta and dist are now scaled by 10^m, we need to do the + // same with ulp in order to keep the units in sync. + // + // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e + // + const std::uint64_t ten_m = one.f; + grisu2_round(buffer, length, dist, delta, p2, ten_m); + + // By construction this algorithm generates the shortest possible decimal + // number (Loitsch, Theorem 6.2) which rounds back to w. + // For an input number of precision p, at least + // + // N = 1 + ceil(p * log_10(2)) + // + // decimal digits are sufficient to identify all binary floating-point + // numbers (Matula, "In-and-Out conversions"). + // This implies that the algorithm does not produce more than N decimal + // digits. + // + // N = 17 for p = 53 (IEEE double precision) + // N = 9 for p = 24 (IEEE single precision) +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus, + diyfp v, diyfp m_plus) { + + // --------(-----------------------+-----------------------)-------- (A) + // m- v m+ + // + // --------------------(-----------+-----------------------)-------- (B) + // m- v m+ + // + // First scale v (and m- and m+) such that the exponent is in the range + // [alpha, gamma]. + + const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); + + const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k + + // The exponent of the products is = v.e + c_minus_k.e + q and is in the range + // [alpha,gamma] + const diyfp w = diyfp::mul(v, c_minus_k); + const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); + const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); + + // ----(---+---)---------------(---+---)---------------(---+---)---- + // w- w w+ + // = c*m- = c*v = c*m+ + // + // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and + // w+ are now off by a small amount. + // In fact: + // + // w - v * 10^k < 1 ulp + // + // To account for this inaccuracy, add resp. subtract 1 ulp. + // + // --------+---[---------------(---+---)---------------]---+-------- + // w- M- w M+ w+ + // + // Now any number in [M-, M+] (bounds included) will round to w when input, + // regardless of how the input rounding algorithm breaks ties. + // + // And digit_gen generates the shortest possible such number in [M-, M+]. + // Note that this does not mean that Grisu2 always generates the shortest + // possible number in the interval (m-, m+). + const diyfp M_minus(w_minus.f + 1, w_minus.e); + const diyfp M_plus(w_plus.f - 1, w_plus.e); + + decimal_exponent = -cached.k; // = -(-k) = k + + grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +template +void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { + static_assert(diyfp::kPrecision >= std::numeric_limits::digits + 3, + "internal error: not enough precision"); + + // If the neighbors (and boundaries) of 'value' are always computed for + // double-precision numbers, all float's can be recovered using strtod (and + // strtof). However, the resulting decimal representations are not exactly + // "short". + // + // The documentation for 'std::to_chars' + // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is + // converted to a string as if by std::sprintf in the default ("C") locale" + // and since sprintf promotes float's to double's, I think this is exactly + // what 'std::to_chars' does. On the other hand, the documentation for + // 'std::to_chars' requires that "parsing the representation using the + // corresponding std::from_chars function recovers value exactly". That + // indicates that single precision floating-point numbers should be recovered + // using 'std::strtof'. + // + // NB: If the neighbors are computed for single-precision numbers, there is a + // single float + // (7.0385307e-26f) which can't be recovered using strtod. The resulting + // double precision value is off by 1 ulp. +#if 0 + const boundaries w = compute_boundaries(static_cast(value)); +#else + const boundaries w = compute_boundaries(value); +#endif + + grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); +} + +/*! +@brief appends a decimal representation of e to buf +@return a pointer to the element following the exponent. +@pre -1000 < e < 1000 +*/ +inline char *append_exponent(char *buf, int e) { + + if (e < 0) { + e = -e; + *buf++ = '-'; + } else { + *buf++ = '+'; + } + + auto k = static_cast(e); + if (k < 10) { + // Always print at least two digits in the exponent. + // This is for compatibility with printf("%g"). + *buf++ = '0'; + *buf++ = static_cast('0' + k); + } else if (k < 100) { + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } else { + *buf++ = static_cast('0' + k / 100); + k %= 100; + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } + + return buf; +} + +/*! +@brief prettify v = buf * 10^decimal_exponent +If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point +notation. Otherwise it will be printed in exponential notation. +@pre min_exp < 0 +@pre max_exp > 0 +*/ +inline char *format_buffer(char *buf, int len, int decimal_exponent, + int min_exp, int max_exp) { + + const int k = len; + const int n = len + decimal_exponent; + + // v = buf * 10^(n-k) + // k is the length of the buffer (number of decimal digits) + // n is the position of the decimal point relative to the start of the buffer. + + if (k <= n && n <= max_exp) { + // digits[000] + // len <= max_exp + 2 + + std::memset(buf + k, '0', static_cast(n) - static_cast(k)); + // Make it look like a floating-point number (#362, #378) + // buf[n + 0] = '.'; + // buf[n + 1] = '0'; + return buf + (static_cast(n)); + } + + if (0 < n && n <= max_exp) { + // dig.its + // len <= max_digits10 + 1 + std::memmove(buf + (static_cast(n) + 1), buf + n, + static_cast(k) - static_cast(n)); + buf[n] = '.'; + return buf + (static_cast(k) + 1U); + } + + if (min_exp < n && n <= 0) { + // 0.[000]digits + // len <= 2 + (-min_exp - 1) + max_digits10 + + std::memmove(buf + (2 + static_cast(-n)), buf, + static_cast(k)); + buf[0] = '0'; + buf[1] = '.'; + std::memset(buf + 2, '0', static_cast(-n)); + return buf + (2U + static_cast(-n) + static_cast(k)); + } + + if (k == 1) { + // dE+123 + // len <= 1 + 5 + + buf += 1; + } else { + // d.igitsE+123 + // len <= max_digits10 + 1 + 5 + + std::memmove(buf + 2, buf + 1, static_cast(k) - 1); + buf[1] = '.'; + buf += 1 + static_cast(k); + } + + *buf++ = 'e'; + return append_exponent(buf, n - 1); +} + +} // namespace dtoa_impl + +/*! +The format of the resulting decimal representation is similar to printf's %g +format. Returns an iterator pointing past-the-end of the decimal representation. +@note The input number must be finite, i.e. NaN's and Inf's are not supported. +@note The buffer must be large enough. +@note The result is NOT null-terminated. +*/ +char *to_chars(char *first, const char *last, double value) { + static_cast(last); // maybe unused - fix warning + bool negative = std::signbit(value); + if (negative) { + value = -value; + *first++ = '-'; + } + + if (value == 0) // +-0 + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) + if(negative) { + *first++ = '.'; + *first++ = '0'; + } + return first; + } + // Compute v = buffer * 10^decimal_exponent. + // The decimal digits are stored in the buffer, which needs to be interpreted + // as an unsigned decimal integer. + // len is the length of the buffer, i.e. the number of decimal digits. + int len = 0; + int decimal_exponent = 0; + dtoa_impl::grisu2(first, len, decimal_exponent, value); + // Format the buffer like printf("%.*g", prec, value) + constexpr int kMinExp = -4; + constexpr int kMaxExp = std::numeric_limits::digits10; + + return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, + kMaxExp); +} +} // namespace internal +} // namespace simdjson +/* end file src/to_chars.cpp */ +/* begin file src/from_chars.cpp */ +#include +namespace simdjson { +namespace internal { + +/** + * The code in the internal::from_chars function is meant to handle the floating-point number parsing + * when we have more than 19 digits in the decimal mantissa. This should only be seen + * in adversarial scenarios: we do not expect production systems to even produce + * such floating-point numbers. + * + * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) + * who credits Ken Thompson for the design (via a reference to the Go source + * code). See + * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c + * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c + * It is probably not very fast but it is a fallback that should almost never be + * called in real life. Google Wuffs is published under APL 2.0. + **/ + +namespace { +constexpr uint32_t max_digits = 768; +constexpr int32_t decimal_point_range = 2047; +} // namespace + +struct adjusted_mantissa { + uint64_t mantissa; + int power2; + adjusted_mantissa() : mantissa(0), power2(0) {} +}; + +struct decimal { + uint32_t num_digits; + int32_t decimal_point; + bool negative; + bool truncated; + uint8_t digits[max_digits]; +}; + +template struct binary_format { + static constexpr int mantissa_explicit_bits(); + static constexpr int minimum_exponent(); + static constexpr int infinite_power(); + static constexpr int sign_index(); +}; + +template <> constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} + +template <> constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> constexpr int binary_format::infinite_power() { + return 0x7FF; +} + +template <> constexpr int binary_format::sign_index() { return 63; } + +bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } + +// This should always succeed since it follows a call to parse_number. +decimal parse_decimal(const char *&p) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while (*p == '0') { + ++p; + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if (*p == '.') { + ++p; + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while (is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +namespace { + +// remove all final zeroes +inline void trim(decimal &h) { + while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { + h.num_digits--; + } +} + +uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { + shift &= 63; + const static uint16_t number_of_digits_decimal_left_shift_table[65] = { + 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, + 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, + 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, + 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, + 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, + 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, + 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, + 0x051C, 0x051C, + }; + uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; + uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; + uint32_t num_new_digits = x_a >> 11; + uint32_t pow5_a = 0x7FF & x_a; + uint32_t pow5_b = 0x7FF & x_b; + const static uint8_t + number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { + 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, + 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, + 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, + 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, + 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, + 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, + 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, + 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, + 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, + 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, + 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, + 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, + 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, + 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, + 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, + 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, + 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, + 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, + 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, + 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, + 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, + 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, + 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, + 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, + 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, + 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, + 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, + 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, + 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, + 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, + 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, + 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, + 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, + 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, + 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, + 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, + 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, + 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, + 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, + 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, + 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, + 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, + 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, + 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, + 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, + 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, + 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, + 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, + 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, + 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, + 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, + 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, + 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, + 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, + 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, + 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, + 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, + }; + const uint8_t *pow5 = + &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; + uint32_t i = 0; + uint32_t n = pow5_b - pow5_a; + for (; i < n; i++) { + if (i >= h.num_digits) { + return num_new_digits - 1; + } else if (h.digits[i] == pow5[i]) { + continue; + } else if (h.digits[i] < pow5[i]) { + return num_new_digits - 1; + } else { + return num_new_digits; + } + } + return num_new_digits; +} + +} // end of anonymous namespace + +uint64_t round(decimal &h) { + if ((h.num_digits == 0) || (h.decimal_point < 0)) { + return 0; + } else if (h.decimal_point > 18) { + return UINT64_MAX; + } + // at this point, we know that h.decimal_point >= 0 + uint32_t dp = uint32_t(h.decimal_point); + uint64_t n = 0; + for (uint32_t i = 0; i < dp; i++) { + n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); + } + bool round_up = false; + if (dp < h.num_digits) { + round_up = h.digits[dp] >= 5; // normally, we round up + // but we may need to round to even! + if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { + round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); + } + } + if (round_up) { + n++; + } + return n; +} + +// computes h * 2^-shift +void decimal_left_shift(decimal &h, uint32_t shift) { + if (h.num_digits == 0) { + return; + } + uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); + int32_t read_index = int32_t(h.num_digits - 1); + uint32_t write_index = h.num_digits - 1 + num_new_digits; + uint64_t n = 0; + + while (read_index >= 0) { + n += uint64_t(h.digits[read_index]) << shift; + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + read_index--; + } + while (n > 0) { + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + } + h.num_digits += num_new_digits; + if (h.num_digits > max_digits) { + h.num_digits = max_digits; + } + h.decimal_point += int32_t(num_new_digits); + trim(h); +} + +// computes h * 2^shift +void decimal_right_shift(decimal &h, uint32_t shift) { + uint32_t read_index = 0; + uint32_t write_index = 0; + + uint64_t n = 0; + + while ((n >> shift) == 0) { + if (read_index < h.num_digits) { + n = (10 * n) + h.digits[read_index++]; + } else if (n == 0) { + return; + } else { + while ((n >> shift) == 0) { + n = 10 * n; + read_index++; + } + break; + } + } + h.decimal_point -= int32_t(read_index - 1); + if (h.decimal_point < -decimal_point_range) { // it is zero + h.num_digits = 0; + h.decimal_point = 0; + h.negative = false; + h.truncated = false; + return; + } + uint64_t mask = (uint64_t(1) << shift) - 1; + while (read_index < h.num_digits) { + uint8_t new_digit = uint8_t(n >> shift); + n = (10 * (n & mask)) + h.digits[read_index++]; + h.digits[write_index++] = new_digit; + } + while (n > 0) { + uint8_t new_digit = uint8_t(n >> shift); + n = 10 * (n & mask); + if (write_index < max_digits) { + h.digits[write_index++] = new_digit; + } else if (new_digit > 0) { + h.truncated = true; + } + } + h.num_digits = write_index; + trim(h); +} + +template adjusted_mantissa compute_float(decimal &d) { + adjusted_mantissa answer; + if (d.num_digits == 0) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + // At this point, going further, we can assume that d.num_digits > 0. + // We want to guard against excessive decimal point values because + // they can result in long running times. Indeed, we do + // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 + // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not + // fine (runs for a long time). + // + if(d.decimal_point < -324) { + // We have something smaller than 1e-324 which is always zero + // in binary64 and binary32. + // It should be zero. + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } else if(d.decimal_point >= 310) { + // We have something at least as large as 0.1e310 which is + // always infinite. + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + static const uint32_t max_shift = 60; + static const uint32_t num_powers = 19; + static const uint8_t powers[19] = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // + 33, 36, 39, 43, 46, 49, 53, 56, 59, // + }; + int32_t exp2 = 0; + while (d.decimal_point > 0) { + uint32_t n = uint32_t(d.decimal_point); + uint32_t shift = (n < num_powers) ? powers[n] : max_shift; + decimal_right_shift(d, shift); + if (d.decimal_point < -decimal_point_range) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + exp2 += int32_t(shift); + } + // We shift left toward [1/2 ... 1]. + while (d.decimal_point <= 0) { + uint32_t shift; + if (d.decimal_point == 0) { + if (d.digits[0] >= 5) { + break; + } + shift = (d.digits[0] < 2) ? 2 : 1; + } else { + uint32_t n = uint32_t(-d.decimal_point); + shift = (n < num_powers) ? powers[n] : max_shift; + } + decimal_left_shift(d, shift); + if (d.decimal_point > decimal_point_range) { + // we want to get infinity: + answer.power2 = 0xFF; + answer.mantissa = 0; + return answer; + } + exp2 -= int32_t(shift); + } + // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. + exp2--; + constexpr int32_t minimum_exponent = binary::minimum_exponent(); + while ((minimum_exponent + 1) > exp2) { + uint32_t n = uint32_t((minimum_exponent + 1) - exp2); + if (n > max_shift) { + n = max_shift; + } + decimal_right_shift(d, n); + exp2 += int32_t(n); + } + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; + decimal_left_shift(d, mantissa_size_in_bits); + + uint64_t mantissa = round(d); + // It is possible that we have an overflow, in which case we need + // to shift back. + if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { + decimal_right_shift(d, 1); + exp2 += 1; + mantissa = round(d); + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + } + answer.power2 = exp2 - binary::minimum_exponent(); + if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { + answer.power2--; + } + answer.mantissa = + mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); + return answer; +} + +template +adjusted_mantissa parse_long_mantissa(const char *first) { + decimal d = parse_decimal(first); + return compute_float(d); +} + +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + +double from_chars(const char *first) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + +} // internal +} // simdjson +/* end file src/from_chars.cpp */ +/* begin file src/internal/error_tables.cpp */ + +namespace simdjson { +namespace internal { + + SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] { + { SUCCESS, "No error" }, + { CAPACITY, "This parser can't support a document that big" }, + { MEMALLOC, "Error allocating memory, we're most likely out of memory" }, + { TAPE_ERROR, "The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { DEPTH_ERROR, "The JSON document was too deep (too many nested objects and arrays)" }, + { STRING_ERROR, "Problem while parsing a string" }, + { T_ATOM_ERROR, "Problem while parsing an atom starting with the letter 't'" }, + { F_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'f'" }, + { N_ATOM_ERROR, "Problem while parsing an atom starting with the letter 'n'" }, + { NUMBER_ERROR, "Problem while parsing a number" }, + { UTF8_ERROR, "The input is not valid UTF-8" }, + { UNINITIALIZED, "Uninitialized" }, + { EMPTY, "Empty: no JSON found" }, + { UNESCAPED_CHARS, "Within strings, some characters must be escaped, we found unescaped characters" }, + { UNCLOSED_STRING, "A string is opened, but never closed." }, + { UNSUPPORTED_ARCHITECTURE, "simdjson does not have an implementation supported by this CPU architecture (perhaps it's a non-SIMD CPU?)." }, + { INCORRECT_TYPE, "The JSON element does not have the requested type." }, + { NUMBER_OUT_OF_RANGE, "The JSON number is too large or too small to fit within the requested type." }, + { INDEX_OUT_OF_BOUNDS, "Attempted to access an element of a JSON array that is beyond its length." }, + { NO_SUCH_FIELD, "The JSON field referenced does not exist in this object." }, + { IO_ERROR, "Error reading the file." }, + { INVALID_JSON_POINTER, "Invalid JSON pointer syntax." }, + { INVALID_URI_FRAGMENT, "Invalid URI fragment syntax." }, + { UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, + { PARSER_IN_USE, "Cannot parse a new document while a document is still in use." }, + { OUT_OF_ORDER_ITERATION, "Objects and arrays can only be iterated when they are first encountered." }, + { INSUFFICIENT_PADDING, "simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "JSON document ended early in the middle of an object or array." }, + { SCALAR_DOCUMENT_AS_VALUE, "A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, + { OUT_OF_BOUNDS, "Attempted to access location outside of document."}, + { TRAILING_CONTENT, "Unexpected trailing content in the JSON input."} + }; // error_messages[] + +} // namespace internal +} // namespace simdjson +/* end file src/internal/error_tables.cpp */ +/* begin file src/internal/jsoncharutils_tables.cpp */ + +namespace simdjson { +namespace internal { + +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa, + 0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe, + 0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x10, 0x20, 0x30, 0x40, 0x50, + 0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0, + 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, + 0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x100, 0x200, 0x300, 0x400, 0x500, + 0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00, + 0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00, + 0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, + 0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000, + 0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000, + 0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + +} // namespace internal +} // namespace simdjson +/* end file src/internal/jsoncharutils_tables.cpp */ +/* begin file src/internal/numberparsing_tables.cpp */ + +namespace simdjson { +namespace internal { + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; + +} // namespace internal +} // namespace simdjson +/* end file src/internal/numberparsing_tables.cpp */ +/* begin file src/internal/simdprune_tables.cpp */ +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable +SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +// 256 * 8 bytes = 2kB, easily fits in cache. +SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; //static uint64_t thintable_epi8[256] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 +/* end file src/internal/simdprune_tables.cpp */ +/* begin file src/implementation.cpp */ +#include + +namespace simdjson { + +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets); +} + +namespace internal { + +// Static array of known implementations. We're hoping these get baked into the executable +// without requiring a static initializer. + +#if SIMDJSON_IMPLEMENTATION_ICELAKE +static const icelake::implementation* get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +static const haswell::implementation* get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +static const westmere::implementation* get_westmere_singleton() { + static const westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_IMPLEMENTATION_ARM64 +static const arm64::implementation* get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_ARM64 +#if SIMDJSON_IMPLEMENTATION_PPC64 +static const ppc64::implementation* get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_PPC64 +#if SIMDJSON_IMPLEMENTATION_FALLBACK +static const fallback::implementation* get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final : public implementation { +public: + const std::string &name() const noexcept final { return set_best()->name(); } + const std::string &description() const noexcept final { return set_best()->description(); } + uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final { + return set_best()->create_dom_parser_implementation(capacity, max_length, dst); + } + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { + return set_best()->minify(buf, len, dst, dst_len); + } + simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } + simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} +private: + const implementation *set_best() const noexcept; +}; + +static const std::initializer_list& get_available_implementation_pointers() { + static const std::initializer_list available_implementation_pointers { +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support +class unsupported_implementation final : public implementation { +public: + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t, + size_t, + std::unique_ptr& + ) const noexcept final { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback implementation + // it seems unlikely that unsupported_implementation will ever be used. If it is used, + // then it will flag all strings as invalid. The alternative is to return an error_code + // from which the user has to figure out whether the string is valid UTF-8... which seems + // like a lot of work just to handle the very unlikely case that we have an unsupported + // implementation. And, when it does happen (that we have an unsupported implementation), + // what are the chances that the programmer has a fallback? Given that *we* provide the + // fallback, it implies that the programmer would need a fallback for our fallback. + } + unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation* get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation * const *available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation * const *available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation *available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + for (const implementation *impl : internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() { + static const internal::available_implementation_list available_implementations{}; + return available_implementations; +} + +SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { + static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +} + +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { + return get_active_implementation()->minify(reinterpret_cast(buf), len, reinterpret_cast(dst), dst_len); +} +simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_active_implementation()->validate_utf8(buf, len); +} +const implementation * builtin_implementation() { + static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; + assert(builtin_impl); + return builtin_impl; +} + + +} // namespace simdjson +/* end file src/implementation.cpp */ + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* begin file src/arm64/implementation.cpp */ +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ +/* end file src/arm64/implementation.cpp */ +/* begin file src/arm64/dom_parser_implementation.cpp */ +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +// +// Stage 1 +// +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). *However* if we only need spaces, + // it is likely that we will still compute 'v' above with two lookup_16: one + // could do it a bit cheaper. This is in contrast with the x64 implementations + // where we can, efficiently, do the white space and structural matching + // separately. One reason for this difference is that on ARM NEON, the table + // lookups either zero or leave unchanged the characters exceeding 0xF whereas + // on x64, the equivalent instruction (pshufb) automatically applies a mask, + // ignoring the 4 most significant bits. Thus the x64 implementation is + // optimized differently. This being said, if you use this code strictly + // just for minification (or just to identify the structural characters), + // there is a small untaken optimization opportunity here. We deliberately + // do not pick it up. + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0x80u; +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0xc0u); + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. + // This will work fine because we only have to report errors for cases with 0-1 lead bytes. + // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is + // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. + // The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); + return is_third_byte ^ is_fourth_byte; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace arm64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace arm64 { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// + +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace arm64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + // On ARM, we don't short-circuit this if there are no backslashes, because the branch gives us no + // benefit and therefore makes things worse. + // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return arm64::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ +/* end file src/arm64/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* begin file src/fallback/implementation.cpp */ +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ +/* end file src/fallback/implementation.cpp */ +/* begin file src/fallback/dom_parser_implementation.cpp */ +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +// +// Stage 1 +// +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace fallback { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage1 { + +class structural_scanner { +public: + +simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) + : buf{_parser.buf}, + next_structural_index{_parser.structural_indexes.get()}, + parser{_parser}, + len{static_cast(_parser.len)}, + partial{_partial} { +} + +simdjson_inline void add_structural() { + *next_structural_index = idx; + next_structural_index++; +} + +simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; +} + +simdjson_inline void validate_utf8_character() { + // Continuation + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { + // extra continuation + error = UTF8_ERROR; + idx++; + return; + } + + // 2-byte + if ((buf[idx] & 0x20) == 0) { + // missing continuation + if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 1100000_ 10______ + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } + idx += 2; + return; + } + + // 3-byte + if ((buf[idx] & 0x10) == 0) { + // missing continuation + if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11100000 100_____ ________ + if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } + // surrogates: U+D800-U+DFFF 11101101 101_____ + if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } + idx += 3; + return; + } + + // 4-byte + // missing continuation + if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11110000 1000____ ________ ________ + if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } + // too large: > U+10FFFF: + // 11110100 (1001|101_)____ + // 1111(1___|011_|0101) 10______ + // also includes 5, 6, 7 and 8 byte characters: + // 11111___ + if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } + idx += 4; +} + +// Returns true if the string is unclosed. +simdjson_inline bool validate_string() { + idx++; // skip first quote + while (idx < len && buf[idx] != '"') { + if (buf[idx] == '\\') { + idx += 2; + } else if (simdjson_unlikely(buf[idx] & 0x80)) { + validate_utf8_character(); + } else { + if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } + if (idx >= len) { return true; } + return false; +} + +simdjson_inline bool is_whitespace_or_operator(uint8_t c) { + switch (c) { + case '{': case '}': case '[': case ']': case ',': case ':': + case ' ': case '\r': case '\n': case '\t': + return true; + default: + return false; + } +} + +// +// Parse the entire input in STEP_SIZE-byte chunks. +// +simdjson_inline error_code scan() { + bool unclosed_string = false; + for (;idx 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } + } else if(unclosed_string) { error = UNCLOSED_STRING; } + return error; +} + +private: + const uint8_t *buf; + uint32_t *next_structural_index; + dom_parser_implementation &parser; + uint32_t len; + uint32_t idx{0}; + error_code error{SUCCESS}; + stage1_mode partial; +}; // structural_scanner + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { + this->buf = _buf; + this->len = _len; + stage1::structural_scanner scanner(*this, partial); + return scanner.scan(); +} + +// big table for the minifier +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < len) { + unsigned char c = buf[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + dst[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = uint8_t(~nonescape) | (meta[1]); + } + dst_len = pos; // we intentionally do not work with a reference + // for fear of aliasing + return quote ? UNCLOSED_STRING : SUCCESS; +} + +// credit: based on code from Google Fuchsia (Apache Licensed) +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 8 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v1; + memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + if (byte < 0x80) { + pos++; + continue; + } else if ((byte & 0xe0) == 0xc0) { + next_pos = pos + 2; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); + if (code_point < 0x80 || 0x7ff < code_point) { return false; } + } else if ((byte & 0xf0) == 0xe0) { + next_pos = pos + 3; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } + // range check + code_point = + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); + if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +} // namespace fallback +} // namespace simdjson + +// +// Stage 2 +// +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace fallback { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return fallback::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ +/* end file src/fallback/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* begin file src/icelake/implementation.cpp */ +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ + +/* end file src/icelake/implementation.cpp */ +/* begin file src/icelake/dom_parser_implementation.cpp */ +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + // ASCII white-space ('\r','\n','\t',' ') + simdjson_inline uint64_t whitespace() const noexcept; + // non-quote structural characters (comma, colon, braces, brackets) + simdjson_inline uint64_t op() const noexcept; + // neither a structural character nor a white-space, so letters, numbers and quotes + simdjson_inline uint64_t scalar() const noexcept; + + uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') + uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) +}; + +simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm512_shuffle_epi8(op_table, in.chunks[0]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +// defining SIMDJSON_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write +#define SIMDJSON_CUSTOM_BIT_INDEXER +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace icelake { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace icelake { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +// We must not forget to undefine it now: +#undef SIMDJSON_CUSTOM_BIT_INDEXER + +/** + * We provide a custom version of bit_indexer::write using + * naked intrinsics. + * TODO: make this code more elegant. + */ +// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. +// as a workaround, we disable warnings within the following function. +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +namespace simdjson { namespace icelake { namespace { namespace stage1 { +simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) { return; } + + const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( + 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, + 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, + 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, + 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 + )); + const __m512i start_index = _mm512_set1_epi32(idx); + + const auto count = count_ones(bits); + __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); + _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + + if(count > 16) { + const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); + _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); + if(count > 32) { + const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); + _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); + if(count > 48) { + const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); + _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); + } + } + } + this->tail += count; +} +}}}} +SIMDJSON_POP_DISABLE_WARNINGS + +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace icelake { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return icelake::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return icelake::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace icelake +} // namespace simdjson + +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ +/* end file src/icelake/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* begin file src/haswell/implementation.cpp */ +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ + +/* end file src/haswell/implementation.cpp */ +/* begin file src/haswell/dom_parser_implementation.cpp */ +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + // ASCII white-space ('\r','\n','\t',' ') + simdjson_inline uint64_t whitespace() const noexcept; + // non-quote structural characters (comma, colon, braces, brackets) + simdjson_inline uint64_t op() const noexcept; + // neither a structural character nor a white-space, so letters, numbers and quotes + simdjson_inline uint64_t scalar() const noexcept; + + uint64_t _whitespace; // ASCII white-space ('\r','\n','\t',' ') + uint64_t _op; // structural characters (comma, colon, braces, brackets but not quotes) +}; + +simdjson_inline uint64_t json_character_block::whitespace() const noexcept { return _whitespace; } +simdjson_inline uint64_t json_character_block::op() const noexcept { return _op; } +simdjson_inline uint64_t json_character_block::scalar() const noexcept { return ~(op() | whitespace()); } + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace haswell { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace haswell { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace haswell { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return haswell::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace haswell +} // namespace simdjson + +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ +/* end file src/haswell/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* begin file src/ppc64/implementation.cpp */ +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ +/* end file src/ppc64/implementation.cpp */ +/* begin file src/ppc64/dom_parser_implementation.cpp */ +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +// +// Stage 1 +// +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace ppc64 { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace ppc64 { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace ppc64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + // On PPC, we don't short-circuit this if there are no backslashes, because the branch gives us no + // benefit and therefore makes things worse. + // if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return ppc64::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ +/* end file src/ppc64/dom_parser_implementation.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* begin file src/westmere/implementation.cpp */ +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ +/* end file src/westmere/implementation.cpp */ +/* begin file src/westmere/dom_parser_implementation.cpp */ +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +// +// Stage 1 +// + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +/* begin file src/generic/stage1/utf8_lookup4_algorithm.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/utf8_lookup4_algorithm.h */ +/* begin file src/generic/stage1/json_structural_indexer.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +/* begin file src/generic/stage1/buf_block_reader.h */ +namespace simdjson { +namespace westmere { +namespace { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/buf_block_reader.h */ +/* begin file src/generic/stage1/json_string_scanner.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_string_block(uint64_t backslash, uint64_t escaped, uint64_t quote, uint64_t in_string) : + _backslash(backslash), _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_inline uint64_t escaped() const { return _escaped; } + // Escape characters (backslashes that are not escaped--i.e. in \\, includes only the first \) + simdjson_inline uint64_t escape() const { return _backslash & ~_escaped; } + // Real (non-backslashed) quotes + simdjson_inline uint64_t quote() const { return _quote; } + // Start quotes of strings + simdjson_inline uint64_t string_start() const { return _quote & _in_string; } + // End quotes of strings + simdjson_inline uint64_t string_end() const { return _quote & ~_in_string; } + // Only characters inside the string (not including the quotes) + simdjson_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // backslash characters + uint64_t _backslash; + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-backslashed ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Intended to be defined by the implementation + simdjson_inline uint64_t find_escaped(uint64_t escape); + simdjson_inline uint64_t find_escaped_branchless(uint64_t escape); + + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; + // Whether the first character of the next iteration is escaped. + uint64_t prev_escaped = 0ULL; +}; + +// +// Finds escaped characters (characters following \). +// +// Handles runs of backslashes like \\\" and \\\\" correctly (yielding 0101 and 01010, respectively). +// +// Does this by: +// - Shift the escape mask to get potentially escaped characters (characters after backslashes). +// - Mask escaped sequences that start on *even* bits with 1010101010 (odd bits are escaped, even bits are not) +// - Mask escaped sequences that start on *odd* bits with 0101010101 (even bits are escaped, odd bits are not) +// +// To distinguish between escaped sequences starting on even/odd bits, it finds the start of all +// escape sequences, filters out the ones that start on even bits, and adds that to the mask of +// escape sequences. This causes the addition to clear out the sequences starting on odd bits (since +// the start bit causes a carry), and leaves even-bit sequences alone. +// +// Example: +// +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// escape | xxx | xx xxx xxx xx xx | Removed overflow backslash; will | it into follows_escape +// odd_starts | x | x x x | escape & ~even_bits & ~follows_escape +// even_seq | c| cxxx c xx c | c = carry bit -- will be masked out later +// invert_mask | | cxxx c xx c| even_seq << 1 +// follows_escape | xx | x xx xxx xxx xx xx | Includes overflow bit +// escaped | x | x x x x x x x x | +// desired | x | x x x x x x x x | +// text | \\\ | \\\"\\\" \\\" \\"\\" | +// +simdjson_inline uint64_t json_string_scanner::find_escaped_branchless(uint64_t backslash) { + // If there was overflow, pretend the first character isn't a backslash + backslash &= ~prev_escaped; + uint64_t follows_escape = backslash << 1 | prev_escaped; + + // Get sequences starting on even bits by clearing out the odd series using + + const uint64_t even_bits = 0x5555555555555555ULL; + uint64_t odd_sequence_starts = backslash & ~even_bits & ~follows_escape; + uint64_t sequences_starting_on_even_bits; + prev_escaped = add_overflow(odd_sequence_starts, backslash, &sequences_starting_on_even_bits); + uint64_t invert_mask = sequences_starting_on_even_bits << 1; // The mask we want to return is the *escaped* bits, not escapes. + + // Mask every other backslashed character as an escaped character + // Flip the mask for sequences that start on even bits, to correct them + return (even_bits ^ invert_mask) & follows_escape; +} + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = find_escaped(backslash); + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + // right shift of a signed value expected to be well-defined and standard + // compliant as of C++20, John Regher from Utah U. says this is fine code + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block( + backslash, + escaped, + quote, + in_string + ); +} + +simdjson_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_string_scanner.h */ +/* begin file src/generic/stage1/json_scanner.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_scanner.h */ +/* begin file src/generic/stage1/json_minifier.h */ +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_minifier.h */ +/* begin file src/generic/stage1/find_next_document_index.h */ +namespace simdjson { +namespace westmere { +namespace { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/find_next_document_index.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_CUSTOM_BIT_INDEXER, then it will provide its own + // version of the code. +#ifdef SIMDJSON_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if defined(SIMDJSON_PREFER_REVERSE_BITS) + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); + checker.check_next_input(in); + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/json_structural_indexer.h */ +/* begin file src/generic/stage1/utf8_validator.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage1/utf8_validator.h */ + +// +// Stage 2 +// +/* begin file src/generic/stage2/stringparsing.h */ +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + return false; + } + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + return false; + } + + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + return false; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/stringparsing.h */ +/* begin file src/generic/stage2/tape_builder.h */ +/* begin file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/logger.h */ +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace westmere { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/json_iterator.h */ +/* begin file src/generic/stage2/tape_writer.h */ +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct number_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/tape_writer.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // class tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst); + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file src/generic/stage2/tape_builder.h */ + +// +// Implementation-specific overrides +// + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +simdjson_inline uint64_t json_string_scanner::find_escaped(uint64_t backslash) { + if (!backslash) { uint64_t escaped = prev_escaped; prev_escaped = 0; return escaped; } + return find_escaped_branchless(backslash); +} + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst) const noexcept { + return westmere::stringparsing::parse_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace westmere +} // namespace simdjson + +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ +/* end file src/westmere/dom_parser_implementation.cpp */ +#endif + +SIMDJSON_POP_DISABLE_WARNINGS +/* end file src/simdjson.cpp */ diff --git a/kram-profile/CBA/simdjson.h b/kram-profile/CBA/simdjson.h new file mode 100644 index 00000000..4ad510d3 --- /dev/null +++ b/kram-profile/CBA/simdjson.h @@ -0,0 +1,31622 @@ +/* auto-generated on 2022-10-16 16:59:15 +0000. Do not edit! */ +/* begin file include/simdjson.h */ +#ifndef SIMDJSON_H +#define SIMDJSON_H + +/** + * @mainpage + * + * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). + * + * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. + + #include "simdjson.h" + + int main(void) { + // load from `twitter.json` file: + simdjson::dom::parser parser; + simdjson::dom::element tweets = parser.load("twitter.json"); + std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; + + // Parse and iterate through an array of objects + auto abstract_json = R"( [ + { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, + { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } + ] )"_padded; + + for (simdjson::dom::object obj : parser.parse(abstract_json)) { + for(const auto key_value : obj) { + cout << "key: " << key_value.key << " : "; + simdjson::dom::object innerobj = key_value.value; + cout << "a: " << double(innerobj["a"]) << ", "; + cout << "b: " << double(innerobj["b"]) << ", "; + cout << "c: " << int64_t(innerobj["c"]) << endl; + } + } + } + */ + +/* begin file include/simdjson/simdjson_version.h */ +// /include/simdjson/simdjson_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDJSON_SIMDJSON_VERSION_H +#define SIMDJSON_SIMDJSON_VERSION_H + +/** The version of simdjson being used (major.minor.revision) */ +#define SIMDJSON_VERSION 3.0.0 + +namespace simdjson { +enum { + /** + * The major version (MAJOR.minor.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MAJOR = 3, + /** + * The minor version (major.MINOR.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MINOR = 0, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 0 +}; +} // namespace simdjson + +#endif // SIMDJSON_SIMDJSON_VERSION_H +/* end file include/simdjson/simdjson_version.h */ +/* begin file include/simdjson/dom.h */ +#ifndef SIMDJSON_DOM_H +#define SIMDJSON_DOM_H + +/* begin file include/simdjson/base.h */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* begin file include/simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file include/simdjson/compiler_check.h */ +/* begin file include/simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* begin file include/simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +// https://en.wikipedia.org/wiki/C_alternative_tokens +// This header should have no effect, except maybe +// under Visual Studio. +#include +#endif + +#if defined(__x86_64__) || defined(_M_AMD64) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#define SIMDJSON_IS_PPC64 1 +#else +#define SIMDJSON_IS_32BITS 1 + +// We do not support 32-bit platforms, but it can be +// handy to identify them. +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) + +#ifdef SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ +compiling for a known 64-bit platform. All fast kernels \ +will be disabled and performance may be poor. Please \ +use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#ifdef SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + +#ifdef SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#ifdef NDEBUG + +#ifdef SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // NDEBUG + +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file include/simdjson/portability.h */ + +namespace simdjson { + +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; + +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if defined(SIMDJSON_CLANG_VISUAL_STUDIO) + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if defined(SIMDJSON_VISUAL_STUDIO) + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* begin file include/simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 6 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if _MSC_VER +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++20 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202000L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +} // namespace detail + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr14 size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos + : to_pos( std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr14 size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !os ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file include/simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file include/simdjson/common_defs.h */ + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +// Public API +/* begin file include/simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file include/simdjson/error.h */ +/* begin file include/simdjson/minify.h */ +#ifndef SIMDJSON_MINIFY_H +#define SIMDJSON_MINIFY_H + +/* begin file include/simdjson/padded_string.h */ +#ifndef SIMDJSON_PADDED_STRING_H +#define SIMDJSON_PADDED_STRING_H + +#include +#include +#include +#include + +namespace simdjson { + +class padded_string_view; + +/** + * String with extra allocation for ease of use with parser::parse() + * + * This is a move-only class, it cannot be copied. + */ +struct padded_string final { + + /** + * Create a new, empty padded string. + */ + explicit inline padded_string() noexcept; + /** + * Create a new padded string buffer. + * + * @param length the size of the string. + */ + explicit inline padded_string(size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param data the buffer to copy + * @param length the number of bytes to copy + */ + explicit inline padded_string(const char *data, size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param str_ the string to copy + */ + inline padded_string(const std::string & str_ ) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param sv_ the string to copy + */ + inline padded_string(std::string_view sv_) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string(padded_string &&o) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string &operator=(padded_string &&o) noexcept; + inline void swap(padded_string &o) noexcept; + ~padded_string() noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t size() const noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t length() const noexcept; + + /** + * The string data. + **/ + const char *data() const noexcept; + const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} + + /** + * The string data. + **/ + char *data() noexcept; + + /** + * Create a std::string_view with the same content. + */ + operator std::string_view() const; + + /** + * Create a padded_string_view with the same content. + */ + operator padded_string_view() const noexcept; + + /** + * Load this padded string from a file. + * + * @return IO_ERROR on error. Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * + * @param path the path to the file. + **/ + inline static simdjson_result load(std::string_view path) noexcept; + +private: + padded_string &operator=(const padded_string &o) = delete; + padded_string(const padded_string &o) = delete; + + size_t viable_size{0}; + char *data_ptr{nullptr}; + +}; // padded_string + +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +// This is deliberately outside of simdjson so that people get it without having to use the namespace +inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { + return simdjson::padded_string(str, len); +} + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_H +/* end file include/simdjson/padded_string.h */ +#include +#include +#include + +namespace simdjson { + + + +/** + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * This function is much faster than parsing a JSON string and then writing a minified version of it. + * However, it does not validate the input. It will merely return an error in simple cases (e.g., if + * there is a string that was never terminated). + * + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; + +} // namespace simdjson + +#endif // SIMDJSON_MINIFY_H +/* end file include/simdjson/minify.h */ +/* begin file include/simdjson/padded_string_view.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_H +#define SIMDJSON_PADDED_STRING_VIEW_H + + +#include +#include +#include +#include + +namespace simdjson { + +/** + * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). + */ +class padded_string_view : public std::string_view { +private: + size_t _capacity; + +public: + /** Create an empty padded_string_view. */ + inline padded_string_view() noexcept = default; + + /** + * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param len The length of the string (not including padding). + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; + /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ + explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; + + /** + * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * The capacity of the string will be used to determine its padding. + * + * @param s The string. + */ + explicit inline padded_string_view(const std::string &s) noexcept; + + /** + * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; + + /** The number of allocated bytes. */ + inline size_t capacity() const noexcept; + + /** The amount of padding on the string (capacity() - length()) */ + inline size_t padding() const noexcept; + +}; // padded_string_view + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string_view. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_H +/* end file include/simdjson/padded_string_view.h */ +/* begin file include/simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* begin file include/simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param src pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file include/simdjson/internal/dom_parser_implementation.h */ +/* begin file include/simdjson/internal/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_ISADETECTION_H +#define SIMDJSON_INTERNAL_ISADETECTION_H + +#include +#include +#if defined(_MSC_VER) +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif + +namespace simdjson { +namespace internal { + + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000 +}; + +#if defined(__PPC64__) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; +} + +#elif defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64 + +#if defined(__ARM_NEON) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; +} + +#else // ARM without NEON + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + +#endif + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + + +namespace { +// Can be found on Intel ISA Reference for CPUID +constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 +constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 +constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +} + + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuid(cpu_info, *eax); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif +} + +static inline uint32_t detect_supported_architectures() { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= instruction_set::BMI1; + } + + if (ebx & cpuid_bmi2_bit) { + host_isa |= instruction_set::BMI2; + } + + if (ebx & cpuid_avx512f_bit) { + host_isa |= instruction_set::AVX512F; + } + + if (ebx & cpuid_avx512dq_bit) { + host_isa |= instruction_set::AVX512DQ; + } + + if (ebx & cpuid_avx512ifma_bit) { + host_isa |= instruction_set::AVX512IFMA; + } + + if (ebx & cpuid_avx512pf_bit) { + host_isa |= instruction_set::AVX512PF; + } + + if (ebx & cpuid_avx512er_bit) { + host_isa |= instruction_set::AVX512ER; + } + + if (ebx & cpuid_avx512cd_bit) { + host_isa |= instruction_set::AVX512CD; + } + + if (ebx & cpuid_avx512bw_bit) { + host_isa |= instruction_set::AVX512BW; + } + + if (ebx & cpuid_avx512vl_bit) { + host_isa |= instruction_set::AVX512VL; + } + + if (ecx & cpuid_avx512vbmi2_bit) { + host_isa |= instruction_set::AVX512VBMI2; + } + + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= instruction_set::SSE42; + } + + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= instruction_set::PCLMULQDQ; + } + + return host_isa; +} +#else // fallback + + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + + +#endif // end SIMD extension detection code + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ISADETECTION_H +/* end file include/simdjson/internal/isadetection.h */ +#include +#include +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param s the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +namespace dom { + class document; +} // namespace dom + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual const std::string &name() const { return _name; } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual const std::string &description() const { return _description; } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; }; + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; + +private: + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file include/simdjson/implementation.h */ + +// Inline functions +/* begin file include/simdjson/error-inl.h */ +#ifndef SIMDJSON_INLINE_ERROR_H +#define SIMDJSON_INLINE_ERROR_H + +#include +#include +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_ERROR_H +/* end file include/simdjson/error-inl.h */ +/* begin file include/simdjson/padded_string-inl.h */ +#ifndef SIMDJSON_INLINE_PADDED_STRING_H +#define SIMDJSON_INLINE_PADDED_STRING_H + + +#include +#include +#include +#include + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept { + const size_t totalpaddedlength = length + SIMDJSON_PADDING; + if(totalpaddedlength(1UL<<20)) { + return nullptr; + } +#endif + + char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; + if (padded_buffer == nullptr) { + return nullptr; + } + // We write zeroes in the padded region to avoid having uninitized + // garbage. If nothing else, garbage getting read might trigger a + // warning in a memory checking. + std::memset(padded_buffer + length, 0, totalpaddedlength - length); + return padded_buffer; +} // allocate_padded_buffer() + +} // namespace internal + + +inline padded_string::padded_string() noexcept = default; +inline padded_string::padded_string(size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { +} +inline padded_string::padded_string(const char *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, data, length); + } +} +// note: do not pass std::string arguments by value +inline padded_string::padded_string(const std::string & str_ ) noexcept + : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { + if (data_ptr != nullptr) { + std::memcpy(data_ptr, str_.data(), str_.size()); + } +} +// note: do pass std::string_view arguments by value +inline padded_string::padded_string(std::string_view sv_) noexcept + : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { + if(simdjson_unlikely(!data_ptr)) { + //allocation failed or zero size + viable_size=0; + return; + } + if (sv_.size()) { + std::memcpy(data_ptr, sv_.data(), sv_.size()); + } +} +inline padded_string::padded_string(padded_string &&o) noexcept + : viable_size(o.viable_size), data_ptr(o.data_ptr) { + o.data_ptr = nullptr; // we take ownership +} + +inline padded_string &padded_string::operator=(padded_string &&o) noexcept { + delete[] data_ptr; + data_ptr = o.data_ptr; + viable_size = o.viable_size; + o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; + return *this; +} + +inline void padded_string::swap(padded_string &o) noexcept { + size_t tmp_viable_size = viable_size; + char *tmp_data_ptr = data_ptr; + viable_size = o.viable_size; + data_ptr = o.data_ptr; + o.data_ptr = tmp_data_ptr; + o.viable_size = tmp_viable_size; +} + +inline padded_string::~padded_string() noexcept { + delete[] data_ptr; +} + +inline size_t padded_string::size() const noexcept { return viable_size; } + +inline size_t padded_string::length() const noexcept { return viable_size; } + +inline const char *padded_string::data() const noexcept { return data_ptr; } + +inline char *padded_string::data() noexcept { return data_ptr; } + +inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } + +inline padded_string::operator padded_string_view() const noexcept { + return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); +} + +inline simdjson_result padded_string::load(std::string_view filename) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(filename.data(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + __int64 llen = _ftelli64(fp); + if(llen == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long llen = std::ftell(fp); + if((llen < 0) || (llen == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Allocate the padded_string + size_t len = static_cast(llen); + padded_string s(len); + if (s.data() == nullptr) { + std::fclose(fp); + return MEMALLOC; + } + + // Read the padded_string + std::rewind(fp); + size_t bytes_read = std::fread(s.data(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != len) { + return IO_ERROR; + } + + return s; +} + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_PADDED_STRING_H +/* end file include/simdjson/padded_string-inl.h */ +/* begin file include/simdjson/padded_string_view-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H +#define SIMDJSON_PADDED_STRING_VIEW_INL_H + + +#include +#include +#include +#include + +namespace simdjson { + +inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept + : std::string_view(s, len), _capacity(capacity) +{ +} + +inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} + +inline padded_string_view::padded_string_view(const std::string &s) noexcept + : std::string_view(s), _capacity(s.capacity()) +{ +} + +inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept + : std::string_view(s), _capacity(capacity) +{ +} + +inline size_t padded_string_view::capacity() const noexcept { return _capacity; } + +inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H +/* end file include/simdjson/padded_string_view-inl.h */ + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_BASE_H +/* end file include/simdjson/base.h */ + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +/* begin file include/simdjson/dom/array.h */ +#ifndef SIMDJSON_DOM_ARRAY_H +#define SIMDJSON_DOM_ARRAY_H + +/* begin file include/simdjson/internal/tape_ref.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_REF_H +#define SIMDJSON_INTERNAL_TAPE_REF_H + +/* begin file include/simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file include/simdjson/internal/tape_type.h */ + +namespace simdjson { + +namespace dom { + class document; +} + +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + +/** + * A reference to an element on the tape. Internal only. + */ +class tape_ref { +public: + simdjson_inline tape_ref() noexcept; + simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; + inline size_t after_element() const noexcept; + simdjson_inline tape_type tape_ref_type() const noexcept; + simdjson_inline uint64_t tape_value() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_false() const noexcept; + simdjson_inline bool is_true() const noexcept; + simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_inline uint32_t matching_brace_index() const noexcept; + simdjson_inline uint32_t scope_count() const noexcept; + template + simdjson_inline T next_tape_value() const noexcept; + simdjson_inline uint32_t get_string_length() const noexcept; + simdjson_inline const char * get_c_str() const noexcept; + inline std::string_view get_string_view() const noexcept; + simdjson_inline bool is_document_root() const noexcept; + simdjson_inline bool usable() const noexcept; + + /** The document this element references. */ + const dom::document *doc; + + /** The index of this element on `doc.tape[]` */ + size_t json_index; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_REF_H +/* end file include/simdjson/internal/tape_ref.h */ + +namespace simdjson { + +namespace internal { +template +class string_builder; +} +namespace dom { + +class document; +class element; + +/** + * JSON array. + */ +class array { +public: + /** Create a new, invalid array */ + simdjson_inline array() noexcept; + + class iterator { + public: + using value_type = element; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual value + */ + inline value_type operator*() const noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator& operator++() noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class array; + }; + + /** + * Return the first array element. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last array element. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the array (number of immediate children). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t number_of_slots() const noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); + * a.at_pointer("/0/foo/a/1") == 20 + * a.at_pointer("0")["foo"]["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity and + * is equivalent to the following: + * + * size_t i=0; + * for (auto element : *this) { + * if (i == index) { return element; } + * i++; + * } + * return INDEX_OUT_OF_BOUNDS; + * + * Avoid calling the at() function repeatedly. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + +private: + simdjson_inline array(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at(size_t index) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::array::iterator begin() const noexcept(false); + inline dom::array::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_ARRAY_H +/* end file include/simdjson/dom/array.h */ +/* begin file include/simdjson/dom/document_stream.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_H +#define SIMDJSON_DOCUMENT_STREAM_H + +/* begin file include/simdjson/dom/parser.h */ +#ifndef SIMDJSON_DOM_PARSER_H +#define SIMDJSON_DOM_PARSER_H + +/* begin file include/simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +#include +#include + +namespace simdjson { +namespace dom { + +class element; + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/document.h */ +#include +#include +#include + +namespace simdjson { + +namespace dom { + +class document_stream; +class element; + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +/** + * A persistent document parser. + * + * The parser is designed to be reused, holding the internal buffers necessary to do parsing, + * as well as memory for a single document. The parsed document is overwritten on each parse. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + * + * @note Moving a parser instance may invalidate "dom::element" instances. If you need to + * preserve both the "dom::element" instances and the parser, consider wrapping the parser + * instance in a std::unique_ptr instance: + * + * std::unique_ptr parser(new dom::parser{}); + * auto error = parser->load(f).get(root); + * + * You can then move std::unique_ptr safely. + * + * @note This is not thread safe: one parser cannot produce two documents at the same time! + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + * + * @param max_capacity The maximum document length the parser can automatically handle. The parser + * will allocate more capacity on an as needed basis (when it sees documents too big to handle) + * up to this amount. The parser still starts with zero capacity no matter what this number is: + * to allocate an initial capacity, call allocate() after constructing the parser. + * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). + */ + simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser(parser &&other) noexcept; + parser(const parser &) = delete; ///< @private Disallow copying + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser &operator=(parser &&other) noexcept; + parser &operator=(const parser &) = delete; ///< @private Disallow copying + + /** Deallocate the JSON parser. */ + ~parser()=default; + + /** + * Load a JSON document from a file and return a reference to it. + * + * dom::parser parser; + * const element doc = parser.load("jsonexamples/twitter.json"); + * + * The function is eager: the file's content is loaded in memory inside the parser instance + * and immediately parsed. The file can be deleted after the `parser.load` call. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than the file length, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path The path to load. + * @return The document, or an error: + * - IO_ERROR if there was an error opening or reading the file. + * Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load(const std::string &path) & noexcept; + inline simdjson_result load(const std::string &path) && = delete ; + /** + * Parse a JSON document and return a temporary reference to it. + * + * dom::parser parser; + * element doc_root = parser.parse(buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(json, json_len); + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. + * The benefit of setting realloc_if_needed to false is that you avoid a temporary + * memory allocation and a copy. + * + * The padded bytes may be read. It is not important how you initialize + * these bytes though we recommend a sensible default like null character values or spaces. + * For example, the following low-level code is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + * std::memcpy(padded_json_copy.get(), json, json_len); + * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); + * + * ### Parser Capacity + * + * If the parser's current capacity is less than len, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of the document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const std::string &s) & noexcept; + simdjson_inline simdjson_result parse(const std::string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse(const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse(const char *buf) noexcept = delete; + + /** + * Parse a JSON document into a provide document instance and return a temporary reference to it. + * It is similar to the function `parse` except that instead of parsing into the internal + * `document` instance associated with the parser, it allows the user to provide a document + * instance. + * + * dom::parser parser; + * dom::document doc; + * element doc_root = parser.parse_into_document(doc, buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * After the call to parse_into_document, the parser is no longer needed. + * + * The JSON document lives in the document instance: you must keep the document + * instance alive while you navigate through it (i.e., used the returned value from + * parse_into_document). You are encourage to reuse the document instance + * many times with new data to avoid reallocations: + * + * dom::document doc; + * element doc_root1 = parser.parse_into_document(doc, buf1, len); + * //... doc_root1 is a pointer inside doc + * element doc_root2 = parser.parse_into_document(doc, buf1, len); + * //... doc_root2 is a pointer inside doc + * // at this point doc_root1 is no longer safe + * + * Moving the document instance is safe, but it invalidates the element instances. After + * moving a document, you can recover safe access to the document root with its `root()` method. + * + * @param doc The document instance where the parsed data will be stored (on success). + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; + + /** + * Load a file containing many JSON documents. + * + * dom::parser parser; + * for (const element doc : parser.load_many(path)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` + * function has returned. The memory is held by the `parser` instance. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * ### Format + * + * The file must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * Documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path File name pointing at the concatenated JSON to parse. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet + * spot in our tests. + * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE + * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - IO_ERROR if there was an error opening or reading the file. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + + /** + * Parse a buffer containing many JSON documents. + * + * dom::parser parser; + * for (element doc : parser.parse_many(buf, len)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. In particular, the following is unsafe and will not compile: + * + * auto docs = parser.parse_many("[\"temporary data\"]"_padded); + * // here the string "[\"temporary data\"]" may no longer exist in memory + * // the parser instance may not have even accessed the input yet + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The following is safe: + * + * auto json = "[\"temporary data\"]"_padded; + * auto docs = parser.parse_many(json); + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * @private deprecated because it returns bool instead of error_code, which is our standard for + * failures. Use allocate() instead. + * + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return true if successful, false if allocation failed. + */ + [[deprecated("Use allocate() instead.")]] + simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount. + * + * @return Maximum capacity, in bytes. + */ + simdjson_inline size_t max_capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Set max_capacity. This is the largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount as documents are passed + * to it. + * + * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, + * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, + * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. + * + * This call will not allocate or deallocate, even if capacity is currently above max_capacity. + * + * @param max_capacity The new maximum capacity, in bytes. + */ + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + +#ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; +#endif + /** @private Use the new DOM API instead */ + class Iterator; + /** @private Use simdjson_error instead */ + using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; + + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + + /** @private Use `if (parser.parse(...).error())` instead */ + bool valid{false}; + /** @private Use `parser.parse(...).error()` instead */ + error_code error{UNINITIALIZED}; + + /** @private Use `parser.parse(...).value()` instead */ + document doc{}; + + /** @private returns true if the document parsed was valid */ + [[deprecated("Use the result of parser.parse() instead")]] + inline bool is_valid() const noexcept; + + /** + * @private return an error code corresponding to the last parsing attempt, see + * simdjson.h will return UNINITIALIZED if no parsing was attempted + */ + [[deprecated("Use the result of parser.parse() instead")]] + inline int get_error_code() const noexcept; + + /** @private return the string equivalent of "get_error_code" */ + [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] + inline std::string get_error_message() const noexcept; + + /** @private */ + [[deprecated("Use cout << on the result of parser.parse() instead")]] + inline bool print_json(std::ostream &os) const noexcept; + + /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ + inline bool dump_raw_tape(std::ostream &os) const noexcept; + + +private: + /** + * The maximum document length this parser will automatically support. + * + * The parser will not be automatically allocated above this amount. + */ + size_t _max_capacity; + + /** + * The loaded buffer (reused each time load() is called) + */ + std::unique_ptr loaded_bytes; + + /** Capacity of loaded_bytes buffer. */ + size_t _loaded_bytes_capacity{0}; + + // all nodes are stored on the doc.tape using a 64-bit word. + // + // strings, double and ints are stored as + // a 64-bit word with a pointer to the actual value + // + // + // + // for objects or arrays, store [ or { at the beginning and } and ] at the + // end. For the openings ([ or {), we annotate them with a reference to the + // location on the doc.tape of the end, and for then closings (} and ]), we + // annotate them with a reference to the location of the opening + // + // + + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * internal document. + */ + inline error_code ensure_capacity(size_t desired_capacity) noexcept; + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * provided document. + */ + inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; + + /** Read the file into loaded_bytes */ + inline simdjson_result read_file(const std::string &path) noexcept; + + friend class parser::Iterator; + friend class document_stream; + + +}; // class parser + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSER_H +/* end file include/simdjson/dom/parser.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace dom { + + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + dom::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; +}; +#endif + +/** + * A forward-only stream of documents. + * + * Produced by parser::parse_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * error = parser.parse_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.parse_many(json,window); + * for(auto doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + /** + * An iterator through a forward-only stream of documents. + */ + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + /** + * @private + * + * Gives a view of the current document. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * std::string_view v = i->source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline std::string_view source() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + friend class document_stream; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + dom::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** + * Pass the next batch through stage 1 and return when finished. + * When threads are enabled, this may wait for the stage 1 thread to finish. + */ + inline void load_batch() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; + + dom::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; +#ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + friend struct stage1_worker; + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + dom::parser stage1_thread_parser{}; +#endif // SIMDJSON_THREADS_ENABLED + + friend class dom::parser; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; + +}; // class document_stream + +} // namespace dom + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private + +#if SIMDJSON_EXCEPTIONS + simdjson_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_inline dom::document_stream::iterator end() noexcept(false); +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator begin() noexcept; + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator end() noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS +}; // struct simdjson_result + +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_STREAM_H +/* end file include/simdjson/dom/document_stream.h */ +/* begin file include/simdjson/dom/element.h */ +#ifndef SIMDJSON_DOM_ELEMENT_H +#define SIMDJSON_DOM_ELEMENT_H + +#include + +namespace simdjson { +namespace internal { +template +class string_builder; +} +namespace dom { +class array; +class document; +class object; + +/** + * The actual concrete type of a JSON element + * This is the type it is most easily cast to with get<>. + */ +enum class element_type { + ARRAY = '[', ///< dom::array + OBJECT = '{', ///< dom::object + INT64 = 'l', ///< int64_t + UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t + DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. + STRING = '"', ///< std::string_view + BOOL = 't', ///< bool + NULL_VALUE = 'n' ///< null +}; + +/** + * A JSON element. + * + * References an element in a JSON document, representing a JSON null, boolean, string, number, + * array or object. + */ +class element { +public: + /** Create a new, invalid element. */ + simdjson_inline element() noexcept; + + /** The type of this element. */ + simdjson_inline element_type type() const noexcept; + + /** + * Cast this element to an array. + * + * @returns An object that can be used to iterate the array, or: + * INCORRECT_TYPE if the JSON element is not an array. + */ + inline simdjson_result get_array() const noexcept; + /** + * Cast this element to an object. + * + * @returns An object that can be used to look up or iterate the object's fields, or: + * INCORRECT_TYPE if the JSON element is not an object. + */ + inline simdjson_result get_object() const noexcept; + /** + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will + * be invalidated the next time it parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_c_str() const noexcept; + /** + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it + * parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string() const noexcept; + /** + * Cast this element to a signed integer. + * + * @returns A signed 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is negative. + */ + inline simdjson_result get_int64() const noexcept; + /** + * Cast this element to an unsigned integer. + * + * @returns An unsigned 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is too large. + */ + inline simdjson_result get_uint64() const noexcept; + /** + * Cast this element to a double floating-point. + * + * @returns A double value. + * Returns INCORRECT_TYPE if the JSON element is not a number. + */ + inline simdjson_result get_double() const noexcept; + /** + * Cast this element to a bool. + * + * @returns A bool value. + * Returns INCORRECT_TYPE if the JSON element is not a boolean. + */ + inline simdjson_result get_bool() const noexcept; + + /** + * Whether this element is a json array. + * + * Equivalent to is(). + */ + inline bool is_array() const noexcept; + /** + * Whether this element is a json object. + * + * Equivalent to is(). + */ + inline bool is_object() const noexcept; + /** + * Whether this element is a json string. + * + * Equivalent to is() or is(). + */ + inline bool is_string() const noexcept; + /** + * Whether this element is a json number that fits in a signed 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_int64() const noexcept; + /** + * Whether this element is a json number that fits in an unsigned 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_uint64() const noexcept; + /** + * Whether this element is a json number that fits in a double. + * + * Equivalent to is(). + */ + inline bool is_double() const noexcept; + + /** + * Whether this element is a json number. + * + * Both integers and floating points will return true. + */ + inline bool is_number() const noexcept; + + /** + * Whether this element is a json `true` or `false`. + * + * Equivalent to is(). + */ + inline bool is_bool() const noexcept; + /** + * Whether this element is a json `null`. + */ + inline bool is_null() const noexcept; + + /** + * Tell whether the value can be cast to provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + */ + template + simdjson_inline bool is() const noexcept; + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array() or get_string() instead. + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @returns The value cast to the given type, or: + * INCORRECT_TYPE if the value cannot be cast to the given type. + */ + + template + inline simdjson_result get() const noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the value. May not be set if there is an error. + * + * @returns The error that occurred, or SUCCESS if there was no error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + /** + * Get the value as the provided type (T), setting error if it's not the given type. + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the given type. value is undefined if there is an error. + * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. + */ + template + inline void tie(T &value, error_code &error) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Read this element as a boolean. + * + * @return The boolean value + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. + */ + inline operator bool() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline explicit operator const char*() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline operator std::string_view() const noexcept(false); + + /** + * Read this element as an unsigned integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator uint64_t() const noexcept(false); + /** + * Read this element as an signed integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits + */ + inline operator int64_t() const noexcept(false); + /** + * Read this element as an double. + * + * @return The double value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator double() const noexcept(false); + /** + * Read this element as a JSON array. + * + * @return The JSON array. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline operator array() const noexcept(false); + /** + * Read this element as a JSON object (key/value pairs). + * + * @return The JSON object. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object + */ + inline operator object() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The beginning of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator begin() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The end of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at_pointer("/foo/a/1") == 20 + * doc.at_pointer("/foo")["a"].at(1) == 20 + * doc.at_pointer("")["foo"]["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * + * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard + * and allowed the following : + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at("foo/a/1") == 20 + * + * Though it is intuitive, it is not compliant with RFC 6901 + * https://tools.ietf.org/html/rfc6901 + * + * For standard compliance, use the at_pointer function instead. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + inline simdjson_result at(const std::string_view json_pointer) const noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + /** + * Get the value at the given index. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * + * Note: The key will be matched against **unescaped** JSON. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + + /** @private for debugging. Prints out the root element. */ + inline bool dump_raw_tape(std::ostream &out) const noexcept; + +private: + simdjson_inline element(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class document; + friend class object; + friend class array; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; + +}; + +} // namespace dom + +/** The result of a JSON navigation that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result type() const noexcept; + template + simdjson_inline bool is() const noexcept; + template + simdjson_inline simdjson_result get() const noexcept; + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + simdjson_inline simdjson_result get_array() const noexcept; + simdjson_inline simdjson_result get_object() const noexcept; + simdjson_inline simdjson_result get_c_str() const noexcept; + simdjson_inline simdjson_result get_string_length() const noexcept; + simdjson_inline simdjson_result get_string() const noexcept; + simdjson_inline simdjson_result get_int64() const noexcept; + simdjson_inline simdjson_result get_uint64() const noexcept; + simdjson_inline simdjson_result get_double() const noexcept; + simdjson_inline simdjson_result get_bool() const noexcept; + + simdjson_inline bool is_array() const noexcept; + simdjson_inline bool is_object() const noexcept; + simdjson_inline bool is_string() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_number() const noexcept; + simdjson_inline bool is_bool() const noexcept; + simdjson_inline bool is_null() const noexcept; + + simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at(size_t index) const noexcept; + simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator bool() const noexcept(false); + simdjson_inline explicit operator const char*() const noexcept(false); + simdjson_inline operator std::string_view() const noexcept(false); + simdjson_inline operator uint64_t() const noexcept(false); + simdjson_inline operator int64_t() const noexcept(false); + simdjson_inline operator double() const noexcept(false); + simdjson_inline operator dom::array() const noexcept(false); + simdjson_inline operator dom::object() const noexcept(false); + + simdjson_inline dom::array::iterator begin() const noexcept(false); + simdjson_inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file include/simdjson/dom/element.h */ +/* begin file include/simdjson/dom/object.h */ +#ifndef SIMDJSON_DOM_OBJECT_H +#define SIMDJSON_DOM_OBJECT_H + + +namespace simdjson { +namespace internal { +template +class string_builder; +} +namespace dom { + +class document; +class element; +class key_value_pair; + +/** + * JSON object. + */ +class object { +public: + /** Create a new, invalid object */ + simdjson_inline object() noexcept; + + class iterator { + public: + using value_type = key_value_pair; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual key/value pair + */ + inline const value_type operator*() const noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator& operator++() noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline std::string_view key() const noexcept; + /** + * Get the length (in bytes) of the key in this key/value pair. + * You should expect this function to be faster than key().size(). + */ + inline uint32_t key_length() const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view. + */ + inline bool key_equals(std::string_view o) const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view in a case-insensitive manner. + * Case comparisons may only be handled correctly for ASCII strings. + */ + inline bool key_equals_case_insensitive(std::string_view o) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline const char *key_c_str() const noexcept; + /** + * Get the value of this key/value pair. + */ + inline element value() const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class object; + }; + + /** + * Return the first key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the object (number of keys). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("/foo/a/1") == 20 + * obj.at_pointer("/foo")["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * obj.at_pointer("/")["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * It is only guaranteed to work over ASCII inputs. + * + * Note: The key will be matched against **unescaped** JSON. + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +private: + simdjson_inline object(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + +/** + * Key/value pair in an object. + */ +class key_value_pair { +public: + /** key in the key-value pair **/ + std::string_view key; + /** value in the key-value pair **/ + element value; + +private: + simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; + friend class object; +}; + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result operator[](std::string_view key) const noexcept; + inline simdjson_result operator[](const char *key) const noexcept; + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_key(std::string_view key) const noexcept; + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::object::iterator begin() const noexcept(false); + inline dom::object::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_OBJECT_H +/* end file include/simdjson/dom/object.h */ +/* begin file include/simdjson/dom/serialization.h */ +#ifndef SIMDJSON_SERIALIZATION_H +#define SIMDJSON_SERIALIZATION_H + +#include + +namespace simdjson { + +/** + * The string_builder template and mini_formatter class + * are not part of our public API and are subject to change + * at any time! + */ +namespace internal { + +class mini_formatter; + +/** + * @private The string_builder template allows us to construct + * a string from a document element. It is parametrized + * by a "formatter" which handles the details. Thus + * the string_builder template could support both minification + * and prettification, and various other tradeoffs. + */ +template +class string_builder { +public: + /** Construct an initially empty builder, would print the empty string **/ + string_builder() = default; + /** Append an element to the builder (to be printed) **/ + inline void append(simdjson::dom::element value); + /** Append an array to the builder (to be printed) **/ + inline void append(simdjson::dom::array value); + /** Append an object to the builder (to be printed) **/ + inline void append(simdjson::dom::object value); + /** Reset the builder (so that it would print the empty string) **/ + simdjson_inline void clear(); + /** + * Get access to the string. The string_view is owned by the builder + * and it is invalid to use it after the string_builder has been + * destroyed. + * However you can make a copy of the string_view on memory that you + * own. + */ + simdjson_inline std::string_view str() const; + /** Append a key_value_pair to the builder (to be printed) **/ + simdjson_inline void append(simdjson::dom::key_value_pair value); +private: + formatter format{}; +}; + +/** + * @private This is the class that we expect to use with the string_builder + * template. It tries to produce a compact version of the JSON element + * as quickly as possible. + */ +class mini_formatter { +public: + mini_formatter() = default; + /** Add a comma **/ + simdjson_inline void comma(); + /** Start an array, prints [ **/ + simdjson_inline void start_array(); + /** End an array, prints ] **/ + simdjson_inline void end_array(); + /** Start an array, prints { **/ + simdjson_inline void start_object(); + /** Start an array, prints } **/ + simdjson_inline void end_object(); + /** Prints a true **/ + simdjson_inline void true_atom(); + /** Prints a false **/ + simdjson_inline void false_atom(); + /** Prints a null **/ + simdjson_inline void null_atom(); + /** Prints a number **/ + simdjson_inline void number(int64_t x); + /** Prints a number **/ + simdjson_inline void number(uint64_t x); + /** Prints a number **/ + simdjson_inline void number(double x); + /** Prints a key (string + colon) **/ + simdjson_inline void key(std::string_view unescaped); + /** Prints a string. The string is escaped as needed. **/ + simdjson_inline void string(std::string_view unescaped); + /** Clears out the content. **/ + simdjson_inline void clear(); + /** + * Get access to the buffer, it is owned by the instance, but + * the user can make a copy. + **/ + simdjson_inline std::string_view str() const; + +private: + // implementation details (subject to change) + /** Prints one character **/ + simdjson_inline void one_char(char c); + /** Backing buffer **/ + std::vector buffer{}; // not ideal! +}; + +} // internal + +namespace dom { + +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +} // namespace dom + +/** + * Converts JSON to a string. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << to_string(doc) << endl; // prints [1,2,3] + * + */ +template +std::string to_string(T x) { + // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ + // Currently minify and to_string are identical but in the future, they may + // differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} +#if SIMDJSON_EXCEPTIONS +template +std::string to_string(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Minifies a JSON element or document, printing the smallest possible valid JSON. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << minify(doc) << endl; // prints [1,2,3] + * + */ +template +std::string minify(T x) { + return to_string(x); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string minify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + + +} // namespace simdjson + + +#endif +/* end file include/simdjson/dom/serialization.h */ + +// Deprecated API +/* begin file include/simdjson/dom/jsonparser.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_JSONPARSER_H +#define SIMDJSON_DOM_JSONPARSER_H + +/* begin file include/simdjson/dom/parsedjson.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_H +#define SIMDJSON_DOM_PARSEDJSON_H + + +namespace simdjson { + +/** + * @deprecated Use `dom::parser` instead. + */ +using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSEDJSON_H +/* end file include/simdjson/dom/parsedjson.h */ +/* begin file include/simdjson/jsonioutil.h */ +#ifndef SIMDJSON_JSONIOUTIL_H +#define SIMDJSON_JSONIOUTIL_H + + +namespace simdjson { + +#if SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("Use padded_string::load() instead")]] +inline padded_string get_corpus(const char *path) { + return padded_string::load(path); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson + +#endif // SIMDJSON_JSONIOUTIL_H +/* end file include/simdjson/jsonioutil.h */ + +namespace simdjson { + +// +// C API (json_parse and build_parsed_json) declarations +// + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} + +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { + dom::parser parser; + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +/** @private We do not want to allow implicit conversion from C string to std::string. */ +int json_parse(const char *buf, dom::parser &parser) noexcept = delete; +/** @private We do not want to allow implicit conversion from C string to std::string. */ +dom::parser build_parsed_json(const char *buf) noexcept = delete; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_JSONPARSER_H +/* end file include/simdjson/dom/jsonparser.h */ +/* begin file include/simdjson/dom/parsedjson_iterator.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H + +#include +#include +#include +#include +#include +#include + +/* begin file include/simdjson/internal/jsonformatutils.h */ +#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H +#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H + +#include +#include +#include + +namespace simdjson { +namespace internal { + +class escape_json_string; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); + +class escape_json_string { +public: + escape_json_string(std::string_view _str) noexcept : str{_str} {} + operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } +private: + std::string_view str; + friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +}; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { + for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { + // TODO can this be done once at the beginning, or will it mess up << char? + std::ios::fmtflags f(out.flags()); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); + out.flags(f); + } else { + out << unescaped.str[i]; + } + } + } + return out; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H +/* end file include/simdjson/internal/jsonformatutils.h */ + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { +/** @private **/ +class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { +public: + inline Iterator(const dom::parser &parser) noexcept(false); + inline Iterator(const Iterator &o) noexcept; + inline ~Iterator() noexcept; + + inline Iterator& operator=(const Iterator&) = delete; + + inline bool is_ok() const; + + // useful for debugging purposes + inline size_t get_tape_location() const; + + // useful for debugging purposes + inline size_t get_tape_length() const; + + // returns the current depth (start at 1 with 0 reserved for the fictitious + // root node) + inline size_t get_depth() const; + + // A scope is a series of nodes at the same depth, typically it is either an + // object ({) or an array ([). The root node has type 'r'. + inline uint8_t get_scope_type() const; + + // move forward in document order + inline bool move_forward(); + + // retrieve the character code of what we're looking at: + // [{"slutfn are the possibilities + inline uint8_t get_type() const { + return current_type; // short functions should be inlined! + } + + // get the int64_t value at this node; valid only if get_type is "l" + inline int64_t get_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return static_cast(doc.tape[location + 1]); + } + + // get the value as uint64; valid only if if get_type is "u" + inline uint64_t get_unsigned_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return doc.tape[location + 1]; + } + + // get the string value at this node (NULL ended); valid only if get_type is " + // note that tabs, and line endings are escaped in the returned value (see + // print_with_escapes) return value is valid UTF-8, it may contain NULL chars + // within the string: get_string_length determines the true string length. + inline const char *get_string() const { + return reinterpret_cast( + doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); + } + + // return the length of the string in bytes + inline uint32_t get_string_length() const { + uint32_t answer; + std::memcpy(&answer, + reinterpret_cast(doc.string_buf.get() + + (current_val & internal::JSON_VALUE_MASK)), + sizeof(uint32_t)); + return answer; + } + + // get the double value at this node; valid only if + // get_type() is "d" + inline double get_double() const { + if (location + 1 >= tape_length) { + return std::numeric_limits::quiet_NaN(); // default value in + // case of error + } + double answer; + std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); + return answer; + } + + inline bool is_object_or_array() const { return is_object() || is_array(); } + + inline bool is_object() const { return get_type() == '{'; } + + inline bool is_array() const { return get_type() == '['; } + + inline bool is_string() const { return get_type() == '"'; } + + // Returns true if the current type of the node is an signed integer. + // You can get its value with `get_integer()`. + inline bool is_integer() const { return get_type() == 'l'; } + + // Returns true if the current type of the node is an unsigned integer. + // You can get its value with `get_unsigned_integer()`. + // + // NOTE: + // Only a large value, which is out of range of a 64-bit signed integer, is + // represented internally as an unsigned node. On the other hand, a typical + // positive integer, such as 1, 42, or 1000000, is as a signed node. + // Be aware this function returns false for a signed node. + inline bool is_unsigned_integer() const { return get_type() == 'u'; } + // Returns true if the current type of the node is a double floating-point number. + inline bool is_double() const { return get_type() == 'd'; } + // Returns true if the current type of the node is a number (integer or floating-point). + inline bool is_number() const { + return is_integer() || is_unsigned_integer() || is_double(); + } + // Returns true if the current type of the node is a bool with true value. + inline bool is_true() const { return get_type() == 't'; } + // Returns true if the current type of the node is a bool with false value. + inline bool is_false() const { return get_type() == 'f'; } + // Returns true if the current type of the node is null. + inline bool is_null() const { return get_type() == 'n'; } + // Returns true if the type byte represents an object of an array + static bool is_object_or_array(uint8_t type) { + return ((type == '[') || (type == '{')); + } + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // We seek the key using C's strcmp so if your JSON strings contain + // NULL chars, this would trigger a false positive: if you expect that + // to be the case, take extra precautions. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key); + + // as above, but case insensitive lookup (strcmpi instead of strcmp) + inline bool move_to_key_insensitive(const char *key); + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // The string we search for can contain NULL values. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key, uint32_t length); + + // when at a key location within an object, this moves to the accompanying + // value (located next to it). This is equivalent but much faster than + // calling "next()". + inline void move_to_value(); + + // when at [, go one level deep, and advance to the given index. + // if successful, we are left pointing at the value, + // if not, we are still pointing at the array ([) + inline bool move_to_index(uint32_t index); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer follows the rfc6901 standard's syntax: + // https://tools.ietf.org/html/rfc6901 However, the standard says "If a + // referenced member name is not unique in an object, the member that is + // referenced is undefined, and evaluation fails". Here we just return the + // first corresponding value. The length parameter is the length of the + // jsonpointer string ('pointer'). + inline bool move_to(const char *pointer, uint32_t length); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer implementation follows the rfc6901 standard's + // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says + // "If a referenced member name is not unique in an object, the member that + // is referenced is undefined, and evaluation fails". Here we just return + // the first corresponding value. + inline bool move_to(const std::string &pointer) { + return move_to(pointer.c_str(), uint32_t(pointer.length())); + } + + private: + // Almost the same as move_to(), except it searches from the current + // position. The pointer's syntax is identical, though that case is not + // handled by the rfc6901 standard. The '/' is still required at the + // beginning. However, contrary to move_to(), the URI Fragment Identifier + // Representation is not supported here. Also, in case of failure, we are + // left pointing at the closest value it could reach. For these reasons it + // is private. It exists because it is used by move_to(). + inline bool relative_move_to(const char *pointer, uint32_t length); + + public: + // throughout return true if we can do the navigation, false + // otherwise + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move forward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { + // and [. At the object ({) or at the array ([), you can issue a "down" to + // visit their content. valid if we're not at the end of a scope (returns + // true). + inline bool next(); + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move backward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true + // when starting at the end of the scope. At the object ({) or at the array + // ([), you can issue a "down" to visit their content. + // Performance warning: This function is implemented by starting again + // from the beginning of the scope and scanning forward. You should expect + // it to be relatively slow. + inline bool prev(); + + // Moves back to either the containing array or object (type { or [) from + // within a contained scope. + // Valid unless we are at the first level of the document + inline bool up(); + + // Valid if we're at a [ or { and it starts a non-empty scope; moves us to + // start of that deeper scope if it not empty. Thus, given [true, null, + // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. + inline bool down(); + + // move us to the start of our current scope, + // a scope is a series of nodes at the same level + inline void to_start_scope(); + + inline void rewind() { + while (up()) + ; + } + + + + // print the node we are currently pointing at + inline bool print(std::ostream &os, bool escape_strings = true) const; + + private: + const document &doc; + size_t max_depth{}; + size_t depth{}; + size_t location{}; // our current location on a tape + size_t tape_length{}; + uint8_t current_type{}; + uint64_t current_val{}; + typedef struct { + size_t start_of_scope; + uint8_t scope_type; + } scopeindex_t; + + scopeindex_t *depth_index{}; +}; + +} // namespace simdjson +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +/* end file include/simdjson/dom/parsedjson_iterator.h */ + +// Inline functions +/* begin file include/simdjson/dom/array-inl.h */ +#ifndef SIMDJSON_INLINE_ARRAY_H +#define SIMDJSON_INLINE_ARRAY_H + +// Inline implementations go in here. + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} + +namespace dom { + +// +// array inline implementation +// +simdjson_inline array::array() noexcept : tape{} {} +simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline array::iterator array::end() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t array::size() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + return tape.scope_count(); +} +inline size_t array::number_of_slots() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + return tape.matching_brace_index() - tape.json_index; +} +inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at(size_t index) const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed array is invalid +#endif + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +// +// array::iterator inline implementation +// +simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline array::iterator array::iterator::operator++(int) noexcept { + array::iterator out = *this; + ++*this; + return out; +} +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool array::iterator::operator==(const array::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool array::iterator::operator<(const array::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool array::iterator::operator>(const array::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} + +} // namespace dom + + +} // namespace simdjson + +/* begin file include/simdjson/dom/element-inl.h */ +#ifndef SIMDJSON_INLINE_ELEMENT_H +#define SIMDJSON_INLINE_ELEMENT_H + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::element &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +inline simdjson_result simdjson_result::type() const noexcept { + if (error()) { return error(); } + return first.type(); +} + +template +simdjson_inline bool simdjson_result::is() const noexcept { + return !error() && first.is(); +} +template +simdjson_inline simdjson_result simdjson_result::get() const noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) const noexcept { + if (error()) { return error(); } + return first.get(value); +} + +simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { + if (error()) { return error(); } + return first.get_c_str(); +} +simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} +simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { + if (error()) { return error(); } + return first.get_bool(); +} + +simdjson_inline bool simdjson_result::is_array() const noexcept { + return !error() && first.is_array(); +} +simdjson_inline bool simdjson_result::is_object() const noexcept { + return !error() && first.is_object(); +} +simdjson_inline bool simdjson_result::is_string() const noexcept { + return !error() && first.is_string(); +} +simdjson_inline bool simdjson_result::is_int64() const noexcept { + return !error() && first.is_int64(); +} +simdjson_inline bool simdjson_result::is_uint64() const noexcept { + return !error() && first.is_uint64(); +} +simdjson_inline bool simdjson_result::is_double() const noexcept { + return !error() && first.is_double(); +} +simdjson_inline bool simdjson_result::is_number() const noexcept { + return !error() && first.is_number(); +} +simdjson_inline bool simdjson_result::is_bool() const noexcept { + return !error() && first.is_bool(); +} + +simdjson_inline bool simdjson_result::is_null() const noexcept { + return !error() && first.is_null(); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + if (error()) { return error(); } + return first.at(json_pointer); +SIMDJSON_POP_DISABLE_WARNINGS +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +simdjson_inline simdjson_result::operator bool() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator const char *() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator double() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { + return get(); +} + +simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// element inline implementation +// +simdjson_inline element::element() noexcept : tape{} {} +simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } + +inline element_type element::type() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + auto tape_type = tape.tape_ref_type(); + return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); +} + +inline simdjson_result element::get_bool() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + if(tape.is_true()) { + return true; + } else if(tape.is_false()) { + return false; + } + return INCORRECT_TYPE; +} +inline simdjson_result element::get_c_str() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_c_str(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string_length() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: + return tape.get_string_view(); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_uint64() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken + if(tape.is_int64()) { + int64_t result = tape.next_tape_value(); + if (result < 0) { + return NUMBER_OUT_OF_RANGE; + } + return uint64_t(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_int64() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken + if(tape.is_uint64()) { + uint64_t result = tape.next_tape_value(); + // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std + if (result > uint64_t((std::numeric_limits::max)())) { + return NUMBER_OUT_OF_RANGE; + } + return static_cast(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_double() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + // Performance considerations: + // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight + // comparison. + // 2. Using a switch-case relies on the compiler guessing what kind of code generation + // we want... But the compiler cannot know that we expect the type to be "double" + // most of the time. + // We can expect get to refer to a double type almost all the time. + // It is important to craft the code accordingly so that the compiler can use this + // information. (This could also be solved with profile-guided optimization.) + if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken + if(tape.is_uint64()) { + return double(tape.next_tape_value()); + } else if(tape.is_int64()) { + return double(tape.next_tape_value()); + } + return INCORRECT_TYPE; + } + // this is common: + return tape.next_tape_value(); +} +inline simdjson_result element::get_array() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::START_ARRAY: + return array(tape); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_object() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape); + default: + return INCORRECT_TYPE; + } +} + +template +simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept { + return get().get(value); +} +// An element-specific version prevents recursion with simdjson_result::get(value) +template<> +simdjson_warn_unused simdjson_inline error_code element::get(element &value) const noexcept { + value = element(tape); + return SUCCESS; +} +template +inline void element::tie(T &value, error_code &error) && noexcept { + error = get(value); +} + +template +simdjson_inline bool element::is() const noexcept { + auto result = get(); + return !result.error(); +} + +template<> inline simdjson_result element::get() const noexcept { return get_array(); } +template<> inline simdjson_result element::get() const noexcept { return get_object(); } +template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } +template<> inline simdjson_result element::get() const noexcept { return get_string(); } +template<> inline simdjson_result element::get() const noexcept { return get_int64(); } +template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } +template<> inline simdjson_result element::get() const noexcept { return get_double(); } +template<> inline simdjson_result element::get() const noexcept { return get_bool(); } + +inline bool element::is_array() const noexcept { return is(); } +inline bool element::is_object() const noexcept { return is(); } +inline bool element::is_string() const noexcept { return is(); } +inline bool element::is_int64() const noexcept { return is(); } +inline bool element::is_uint64() const noexcept { return is(); } +inline bool element::is_double() const noexcept { return is(); } +inline bool element::is_bool() const noexcept { return is(); } +inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } + +inline bool element::is_null() const noexcept { + return tape.is_null_on_tape(); +} + +#if SIMDJSON_EXCEPTIONS + +inline element::operator bool() const noexcept(false) { return get(); } +inline element::operator const char*() const noexcept(false) { return get(); } +inline element::operator std::string_view() const noexcept(false) { return get(); } +inline element::operator uint64_t() const noexcept(false) { return get(); } +inline element::operator int64_t() const noexcept(false) { return get(); } +inline element::operator double() const noexcept(false) { return get(); } +inline element::operator array() const noexcept(false) { return get(); } +inline element::operator object() const noexcept(false) { return get(); } + +inline array::iterator element::begin() const noexcept(false) { + return get().begin(); +} +inline array::iterator element::end() const noexcept(false) { + return get().end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result element::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result element::operator[](const char *key) const noexcept { + return at_key(key); +} + +inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_pointer(json_pointer); + case internal::tape_type::START_ARRAY: + return array(tape).at_pointer(json_pointer); + default: { + if(!json_pointer.empty()) { // a non-empty string is invalid on an atom + return INVALID_JSON_POINTER; + } + // an empty string means that we return the current node + dom::element copy(*this); + return simdjson_result(std::move(copy)); + } + } +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +inline simdjson_result element::at(std::string_view json_pointer) const noexcept { + // version 0.4 of simdjson allowed non-compliant pointers + auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); + return at_pointer(std_pointer); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline simdjson_result element::at(size_t index) const noexcept { + return get().at(index); +} +inline simdjson_result element::at_key(std::string_view key) const noexcept { + return get().at_key(key); +} +inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { + return get().at_key_case_insensitive(key); +} + +inline bool element::dump_raw_tape(std::ostream &out) const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed element is invalid +#endif + return tape.doc->dump_raw_tape(out); +} + + +inline std::ostream& operator<<(std::ostream& out, element_type type) { + switch (type) { + case element_type::ARRAY: + return out << "array"; + case element_type::OBJECT: + return out << "object"; + case element_type::INT64: + return out << "int64_t"; + case element_type::UINT64: + return out << "uint64_t"; + case element_type::DOUBLE: + return out << "double"; + case element_type::STRING: + return out << "string"; + case element_type::BOOL: + return out << "bool"; + case element_type::NULL_VALUE: + return out << "null"; + default: + return out << "unexpected content!!!"; // abort() usage is forbidden in the library + } +} + +} // namespace dom + +} // namespace simdjson + +#endif // SIMDJSON_INLINE_ELEMENT_H +/* end file include/simdjson/dom/element-inl.h */ + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_INLINE_ARRAY_H +/* end file include/simdjson/dom/array-inl.h */ +/* begin file include/simdjson/dom/document_stream-inl.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_STREAM_H +#define SIMDJSON_INLINE_DOCUMENT_STREAM_H + +#include +#include +#include +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} +#endif + +simdjson_inline document_stream::document_stream( + dom::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change +#endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) +#endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept { +#ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); +#endif +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { + // Note that in case of error, we do not yet mark + // the iterator as "finished": this detection is done + // in the operator++ function since it is possible + // to call operator++ repeatedly while omitting + // calls to operator*. + if (stream->error) { return stream->error; } + return stream->parser->doc.root(); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->ensure_capacity(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } +#ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread if needed + error = stage1_thread_parser.ensure_capacity(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } +#endif // SIMDJSON_THREADS_ENABLED + next(); +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + const char* start = reinterpret_cast(stream->buf) + current_index(); + bool object_or_array = ((*start == '[') || (*start == '{')); + if(object_or_array) { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; + return std::string_view(start, next_doc_index - current_index() + 1); + } else { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; + return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + } +} + + +inline void document_stream::next() noexcept { + // We always exit at once, once in an error condition. + if (error) { return; } + + // Load the next document from the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + // If that was the last document in the batch, load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + +#ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } +#else + error = run_stage1(*parser, batch_start); +#endif + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + // Run stage 2 on the first document in the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + } +} +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(*parser, stage1_thread_parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +simdjson_inline simdjson_result::simdjson_result() noexcept + : simdjson_result_base() { +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : simdjson_result_base(error) { +} +simdjson_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept + : simdjson_result_base(std::forward(value)) { +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { + first.error = error(); + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { + first.error = error(); + return first.end(); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson +#endif // SIMDJSON_INLINE_DOCUMENT_STREAM_H +/* end file include/simdjson/dom/document_stream-inl.h */ +/* begin file include/simdjson/dom/document-inl.h */ +#ifndef SIMDJSON_INLINE_DOCUMENT_H +#define SIMDJSON_INLINE_DOCUMENT_H + +// Inline implementations go in here. + +#include +#include + +namespace simdjson { +namespace dom { + +// +// document inline implementation +// +inline element document::root() const noexcept { + return element(internal::tape_ref(this, 1)); +} +simdjson_warn_unused +inline size_t document::capacity() const noexcept { + return allocated_capacity; +} + +simdjson_warn_unused +inline error_code document::allocate(size_t capacity) noexcept { + if (capacity == 0) { + string_buf.reset(); + tape.reset(); + allocated_capacity = 0; + return SUCCESS; + } + + // a pathological input like "[[[[..." would generate capacity tape elements, so + // need a capacity of at least capacity + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where capacity + 1 tape elements are + // generated, see issue https://github.com/simdjson/simdjson/issues/345 + size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); + // a document with only zero-length strings... could have capacity/3 string + // and we would need capacity/3 * 5 bytes on the string buffer + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); + tape.reset(new (std::nothrow) uint64_t[tape_capacity]); + if(!(string_buf && tape)) { + allocated_capacity = 0; + string_buf.reset(); + tape.reset(); + return MEMALLOC; + } + // Technically the allocated_capacity might be larger than capacity + // so the next line is pessimistic. + allocated_capacity = capacity; + return SUCCESS; +} + +inline bool document::dump_raw_tape(std::ostream &os) const noexcept { + uint32_t string_length; + size_t tape_idx = 0; + uint64_t tape_val = tape[tape_idx]; + uint8_t type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type; + tape_idx++; + size_t how_many = 0; + if (type == 'r') { + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); + } else { + // Error: no starting root node? + return false; + } + os << "\t// pointing to " << how_many << " (right after last node)\n"; + uint64_t payload; + for (; tape_idx < how_many; tape_idx++) { + os << tape_idx << " : "; + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + os << internal::escape_json_string(std::string_view( + reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), + string_length + )); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tape_idx + 1 >= how_many) { + return false; + } + os << "integer " << static_cast(tape[++tape_idx]) << "\n"; + break; + case 'u': // we have a long uint + if (tape_idx + 1 >= how_many) { + return false; + } + os << "unsigned integer " << tape[++tape_idx] << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tape_idx + 1 >= how_many) { + return false; + } + double answer; + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; case '}': // we end an object + os << "}\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case 'r': // we start and end with the root node + // should we be hitting the root node? + return false; + default: + return false; + } + } + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type << "\t// pointing to " << payload + << " (start root)\n"; + return true; +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_INLINE_DOCUMENT_H +/* end file include/simdjson/dom/document-inl.h */ +/* begin file include/simdjson/dom/object-inl.h */ +#ifndef SIMDJSON_INLINE_OBJECT_H +#define SIMDJSON_INLINE_OBJECT_H + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +inline dom::object::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::object::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// object inline implementation +// +simdjson_inline object::object() noexcept : tape{} {} +simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline object::iterator object::begin() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed object is invalid +#endif + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline object::iterator object::end() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed object is invalid +#endif + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t object::size() const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed object is invalid +#endif + return tape.scope_count(); +} + +inline simdjson_result object::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result object::operator[](const char *key) const noexcept { + return at_key(key); +} +inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + // issue https://github.com/simdjson/simdjson/issues/1914 + assert (tape.usable()); // the default constructed object is invalid +#endif + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = at_key(unescaped); + } else { + child = at_key(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_key(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} +// In case you wonder why we need this, please see +// https://github.com/simdjson/simdjson/issues/323 +// People do seek keys in a case-insensitive manner. +inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals_case_insensitive(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} + +// +// object::iterator inline implementation +// +simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline const key_value_pair object::iterator::operator*() const noexcept { + return key_value_pair(key(), value()); +} +inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool object::iterator::operator==(const object::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool object::iterator::operator<(const object::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool object::iterator::operator>(const object::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} +inline object::iterator& object::iterator::operator++() noexcept { + tape.json_index++; + tape.json_index = tape.after_element(); + return *this; +} +inline object::iterator object::iterator::operator++(int) noexcept { + object::iterator out = *this; + ++*this; + return out; +} +inline std::string_view object::iterator::key() const noexcept { + return tape.get_string_view(); +} +inline uint32_t object::iterator::key_length() const noexcept { + return tape.get_string_length(); +} +inline const char* object::iterator::key_c_str() const noexcept { + return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); +} +inline element object::iterator::value() const noexcept { + return element(internal::tape_ref(tape.doc, tape.json_index + 1)); +} + +/** + * Design notes: + * Instead of constructing a string_view and then comparing it with a + * user-provided strings, it is probably more performant to have dedicated + * functions taking as a parameter the string we want to compare against + * and return true when they are equal. That avoids the creation of a temporary + * std::string_view. Though it is possible for the compiler to avoid entirely + * any overhead due to string_view, relying too much on compiler magic is + * problematic: compiler magic sometimes fail, and then what do you do? + * Also, enticing users to rely on high-performance function is probably better + * on the long run. + */ + +inline bool object::iterator::key_equals(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // We avoid construction of a temporary string_view instance. + return (memcmp(o.data(), key_c_str(), len) == 0); + } + return false; +} + +inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // See For case-insensitive string comparisons, avoid char-by-char functions + // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ + // Note that it might be worth rolling our own strncasecmp function, with vectorization. + return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); + } + return false; +} +// +// key_value_pair inline implementation +// +inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : + key(_key), value(_value) {} + +} // namespace dom + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_INLINE_OBJECT_H +/* end file include/simdjson/dom/object-inl.h */ +/* begin file include/simdjson/dom/parsedjson_iterator-inl.h */ +#ifndef SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H +#define SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H + +#include + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { + +// VS2017 reports deprecated warnings when you define a deprecated class's methods. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + +// Because of template weirdness, the actual class definition is inline in the document class +simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { + return location < tape_length; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_location() const { + return location; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_length() const { + return tape_length; +} + +// returns the current depth (start at 1 with 0 reserved for the fictitious root +// node) +size_t dom::parser::Iterator::get_depth() const { + return depth; +} + +// A scope is a series of nodes at the same depth, typically it is either an +// object ({) or an array ([). The root node has type 'r'. +uint8_t dom::parser::Iterator::get_scope_type() const { + return depth_index[depth].scope_type; +} + +bool dom::parser::Iterator::move_forward() { + if (location + 1 >= tape_length) { + return false; // we are at the end! + } + + if ((current_type == '[') || (current_type == '{')) { + // We are entering a new scope + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } else if ((current_type == ']') || (current_type == '}')) { + // Leaving a scope. + depth--; + } else if (is_number()) { + // these types use 2 locations on the tape, not just one. + location += 1; + } + + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +void dom::parser::Iterator::move_to_value() { + // assume that we are on a key, so move by 1. + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +bool dom::parser::Iterator::move_to_key(const char *key) { + if (down()) { + do { + const bool right_key = (strcmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key_insensitive( + const char *key) { + if (down()) { + do { + const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key(const char *key, + uint32_t length) { + if (down()) { + do { + bool right_key = ((get_string_length() == length) && + (memcmp(get_string(), key, length) == 0)); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_index(uint32_t index) { + if (down()) { + uint32_t i = 0; + for (; i < index; i++) { + if (!next()) { + break; + } + } + if (i == index) { + return true; + } + up(); + } + return false; +} + +bool dom::parser::Iterator::prev() { + size_t target_location = location; + to_start_scope(); + size_t npos = location; + if (target_location == npos) { + return false; // we were already at the start + } + size_t oldnpos; + // we have that npos < target_location here + do { + oldnpos = npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + } while (npos < target_location); + location = oldnpos; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::up() { + if (depth == 1) { + return false; // don't allow moving back to root + } + to_start_scope(); + // next we just move to the previous value + depth--; + location -= 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::down() { + if (location + 1 >= tape_length) { + return false; + } + if ((current_type == '[') || (current_type == '{')) { + size_t npos = uint32_t(current_val); + if (npos == location + 2) { + return false; // we have an empty scope + } + depth++; + assert(depth < max_depth); + location = location + 1; + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; + } + return false; +} + +void dom::parser::Iterator::to_start_scope() { + location = depth_index[depth].start_of_scope; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +bool dom::parser::Iterator::next() { + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = location + (is_number() ? 2 : 1); + } + uint64_t next_val = doc.tape[npos]; + uint8_t next_type = uint8_t(next_val >> 56); + if ((next_type == ']') || (next_type == '}')) { + return false; // we reached the end of the scope + } + location = npos; + current_val = next_val; + current_type = next_type; + return true; +} +dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) + : doc(pj.doc) +{ +#if SIMDJSON_EXCEPTIONS + if (!pj.valid) { throw simdjson_error(pj.error); } +#else + if (!pj.valid) { return; } // abort() usage is forbidden in the library +#endif + + max_depth = pj.max_depth(); + depth_index = new scopeindex_t[max_depth + 1]; + depth_index[0].start_of_scope = location; + current_val = doc.tape[location++]; + current_type = uint8_t(current_val >> 56); + depth_index[0].scope_type = current_type; + tape_length = size_t(current_val & internal::JSON_VALUE_MASK); + if (location < tape_length) { + // If we make it here, then depth_capacity must >=2, but the compiler + // may not know this. + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } +} +dom::parser::Iterator::Iterator( + const dom::parser::Iterator &o) noexcept + : doc(o.doc), + max_depth(o.depth), + depth(o.depth), + location(o.location), + tape_length(o.tape_length), + current_type(o.current_type), + current_val(o.current_val) +{ + depth_index = new scopeindex_t[max_depth+1]; + std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); +} + +dom::parser::Iterator::~Iterator() noexcept { + if (depth_index) { delete[] depth_index; } +} + +bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { + if (!is_ok()) { + return false; + } + switch (current_type) { + case '"': // we have a string + os << '"'; + if (escape_strings) { + os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); + } else { + // was: os << get_string();, but given that we can include null chars, we + // have to do something crazier: + std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); + } + os << '"'; + break; + case 'l': // we have a long int + os << get_integer(); + break; + case 'u': + os << get_unsigned_integer(); + break; + case 'd': + os << get_double(); + break; + case 'n': // we have a null + os << "null"; + break; + case 't': // we have a true + os << "true"; + break; + case 'f': // we have a false + os << "false"; + break; + case '{': // we have an object + case '}': // we end an object + case '[': // we start an array + case ']': // we end an array + os << char(current_type); + break; + default: + return false; + } + return true; +} + +bool dom::parser::Iterator::move_to(const char *pointer, + uint32_t length) { + char *new_pointer = nullptr; + if (pointer[0] == '#') { + // Converting fragment representation to string representation + new_pointer = new char[length]; + uint32_t new_length = 0; + for (uint32_t i = 1; i < length; i++) { + if (pointer[i] == '%' && pointer[i + 1] == 'x') { +#if __cpp_exceptions + try { +#endif + int fragment = + std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); + if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { + // escaping the character + new_pointer[new_length] = '\\'; + new_length++; + } + new_pointer[new_length] = char(fragment); + i += 3; +#if __cpp_exceptions + } catch (std::invalid_argument &) { + delete[] new_pointer; + return false; // the fragment is invalid + } +#endif + } else { + new_pointer[new_length] = pointer[i]; + } + new_length++; + } + length = new_length; + pointer = new_pointer; + } + + // saving the current state + size_t depth_s = depth; + size_t location_s = location; + uint8_t current_type_s = current_type; + uint64_t current_val_s = current_val; + + rewind(); // The json pointer is used from the root of the document. + + bool found = relative_move_to(pointer, length); + delete[] new_pointer; + + if (!found) { + // since the pointer has found nothing, we get back to the original + // position. + depth = depth_s; + location = location_s; + current_type = current_type_s; + current_val = current_val_s; + } + + return found; +} + +bool dom::parser::Iterator::relative_move_to(const char *pointer, + uint32_t length) { + if (length == 0) { + // returns the whole document + return true; + } + + if (pointer[0] != '/') { + // '/' must be the first character + return false; + } + + // finding the key in an object or the index in an array + std::string key_or_index; + uint32_t offset = 1; + + // checking for the "-" case + if (is_array() && pointer[1] == '-') { + if (length != 2) { + // the pointer must be exactly "/-" + // there can't be anything more after '-' as an index + return false; + } + key_or_index = '-'; + offset = length; // will skip the loop coming right after + } + + // We either transform the first reference token to a valid json key + // or we make sure it is a valid index in an array. + for (; offset < length; offset++) { + if (pointer[offset] == '/') { + // beginning of the next key or index + break; + } + if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { + // the index of an array must be an integer + // we also make sure std::stoi won't discard whitespaces later + return false; + } + if (pointer[offset] == '~') { + // "~1" represents "/" + if (pointer[offset + 1] == '1') { + key_or_index += '/'; + offset++; + continue; + } + // "~0" represents "~" + if (pointer[offset + 1] == '0') { + key_or_index += '~'; + offset++; + continue; + } + } + if (pointer[offset] == '\\') { + if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || + (pointer[offset + 1] <= 0x1F)) { + key_or_index += pointer[offset + 1]; + offset++; + continue; + } + return false; // invalid escaped character + } + if (pointer[offset] == '\"') { + // unescaped quote character. this is an invalid case. + // lets do nothing and assume most pointers will be valid. + // it won't find any corresponding json key anyway. + // return false; + } + key_or_index += pointer[offset]; + } + + bool found = false; + if (is_object()) { + if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { + found = relative_move_to(pointer + offset, length - offset); + } + } else if (is_array()) { + if (key_or_index == "-") { // handling "-" case first + if (down()) { + while (next()) + ; // moving to the end of the array + // moving to the nonexistent value right after... + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = + location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + location = npos; + current_val = doc.tape[npos]; + current_type = uint8_t(current_val >> 56); + return true; // how could it fail ? + } + } else { // regular numeric index + // The index can't have a leading '0' + if (key_or_index[0] == '0' && key_or_index.length() > 1) { + return false; + } + // it cannot be empty + if (key_or_index.length() == 0) { + return false; + } + // we already checked the index contains only valid digits + uint32_t index = std::stoi(key_or_index); + if (move_to_index(index)) { + found = relative_move_to(pointer + offset, length - offset); + } + } + } + + return found; +} + +SIMDJSON_POP_DISABLE_WARNINGS +} // namespace simdjson + +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + +#endif // SIMDJSON_INLINE_PARSEDJSON_ITERATOR_H +/* end file include/simdjson/dom/parsedjson_iterator-inl.h */ +/* begin file include/simdjson/dom/parser-inl.h */ +#ifndef SIMDJSON_INLINE_PARSER_H +#define SIMDJSON_INLINE_PARSER_H + +#include +#include + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +simdjson_inline parser::parser(parser &&other) noexcept = default; +simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } + +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if defined(SIMDJSON_VISUAL_STUDIO) && !SIMDJSON_IS_32BITS + __int64 len = _ftelli64(fp); + if(len == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result parser::load(const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse(loaded_bytes.get(), len, false); +} + +inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); +} + +inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + // Important: we need to ensure that document has enough capacity. + // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! + error_code _error = ensure_capacity(provided_doc, len); + if (_error) { return _error; } + if (realloc_if_needed) { + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); + } + _error = implementation->parse(realloc_if_needed ? reinterpret_cast(loaded_bytes.get()): buf, len, provided_doc); + + if (_error) { return _error; } + + return provided_doc.root(); +} + +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), false); +} + + +inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(doc, buf, len, realloc_if_needed); +} + +simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse(reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} + +inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +simdjson_warn_unused +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation if needed + // + error_code err; + if (implementation) { + err = implementation->allocate(capacity, max_depth); + } else { + err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + return SUCCESS; +} + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_warn_unused +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + return ensure_capacity(doc, desired_capacity); +} + + +inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { + // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. + // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. + if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } + // If we don't have enough capacity, (try to) automatically bump it. + // If the document needs allocation, do it too. + // Both in one if statement to minimize unlikely branching. + // + // Note: we must make sure that this function is called if capacity() == 0. We do so because we + // ensure that desired_capacity > 0. + if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; + error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; + if(err1 != SUCCESS) { return error = err1; } + if(err2 != SUCCESS) { return error = err2; } + } + return SUCCESS; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_INLINE_PARSER_H +/* end file include/simdjson/dom/parser-inl.h */ +/* begin file include/simdjson/internal/tape_ref-inl.h */ +#ifndef SIMDJSON_INLINE_TAPE_REF_H +#define SIMDJSON_INLINE_TAPE_REF_H + +#include + +namespace simdjson { +namespace internal { + +// +// tape_ref inline implementation +// +simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + + +simdjson_inline bool tape_ref::is_document_root() const noexcept { + return json_index == 1; // should we ever change the structure of the tape, this should get updated. +} +simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). +} +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +simdjson_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +simdjson_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +simdjson_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +simdjson_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +simdjson_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast(doc->tape[json_index] >> 56); +} +simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template +simdjson_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INLINE_TAPE_REF_H +/* end file include/simdjson/internal/tape_ref-inl.h */ +/* begin file include/simdjson/dom/serialization-inl.h */ + +#ifndef SIMDJSON_SERIALIZATION_INL_H +#define SIMDJSON_SERIALIZATION_INL_H + + +#include +#include + +namespace simdjson { +namespace dom { +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + simdjson::internal::string_builder<> sb; + sb.append(doc.root()); + std::string_view answer = sb.str(); + os << answer; + return true; +} +} +/*** + * Number utility functions + **/ + + +namespace { +/**@private + * Escape sequence like \b or \u0001 + * We expect that most compilers will use 8 bytes for this data structure. + **/ +struct escape_sequence { + uint8_t length; + const char string[7]; // technically, we only ever need 6 characters, we pad to 8 +}; +/**@private + * This converts a signed integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 20 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +char *fast_itoa(char *output, int64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + uint64_t value_positive; + // In general, negating a signed integer is unsafe. + if(value < 0) { + *output++ = '-'; + // Doing value_positive = -value; while avoiding + // undefined behavior warnings. + // It assumes two complement's which is universal at this + // point in time. + std::memcpy(&value_positive, &value, sizeof(value)); + value_positive = (~value_positive) + 1; // this is a negation + } else { + value_positive = value; + } + // We work solely with value_positive. It *might* be easier + // for an optimizing compiler to deal with an unsigned variable + // as far as performance goes. + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value_positive >= 10) { + *write_pointer-- = char('0' + (value_positive % 10)); + value_positive /= 10; + } + *write_pointer = char('0' + value_positive); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +/**@private + * This converts an unsigned integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 19 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +char *fast_itoa(char *output, uint64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value >= 10) { + *write_pointer-- = char('0' + (value % 10)); + value /= 10; + }; + *write_pointer = char('0' + value); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +} // anonymous namespace +namespace internal { + +/*** + * Minifier/formatter code. + **/ + +simdjson_inline void mini_formatter::number(uint64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_inline void mini_formatter::number(int64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_inline void mini_formatter::number(double x) { + char number_buffer[24]; + // Currently, passing the nullptr to the second argument is + // safe because our implementation does not check the second + // argument. + char *newp = internal::to_chars(number_buffer, nullptr, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +simdjson_inline void mini_formatter::start_array() { one_char('['); } +simdjson_inline void mini_formatter::end_array() { one_char(']'); } +simdjson_inline void mini_formatter::start_object() { one_char('{'); } +simdjson_inline void mini_formatter::end_object() { one_char('}'); } +simdjson_inline void mini_formatter::comma() { one_char(','); } + + +simdjson_inline void mini_formatter::true_atom() { + const char * s = "true"; + buffer.insert(buffer.end(), s, s + 4); +} +simdjson_inline void mini_formatter::false_atom() { + const char * s = "false"; + buffer.insert(buffer.end(), s, s + 5); +} +simdjson_inline void mini_formatter::null_atom() { + const char * s = "null"; + buffer.insert(buffer.end(), s, s + 4); +} +simdjson_inline void mini_formatter::one_char(char c) { buffer.push_back(c); } +simdjson_inline void mini_formatter::key(std::string_view unescaped) { + string(unescaped); + one_char(':'); +} +simdjson_inline void mini_formatter::string(std::string_view unescaped) { + one_char('\"'); + size_t i = 0; + // Fast path for the case where we have no control character, no ", and no backslash. + // This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for(;i + 8 <= unescaped.length(); i += 8) { + // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] + ) { break; } + } + for(;i < unescaped.length(); i++) { + if(needs_escaping[uint8_t(unescaped[i])]) { break; } + } + // The following is also possible and omits a 256-byte table, but it is slower: + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} + + // At least for long strings, the following should be fast. We could + // do better by integrating the checks and the insertion. + buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + // We caught a control character if we enter this loop (slow). + // Note that we are do not restart from the beginning, but rather we continue + // from the point where we encountered something that requires escaping. + for (; i < unescaped.length(); i++) { + switch (unescaped[i]) { + case '\"': + { + const char * s = "\\\""; + buffer.insert(buffer.end(), s, s + 2); + } + break; + case '\\': + { + const char * s = "\\\\"; + buffer.insert(buffer.end(), s, s + 2); + } + break; + default: + if (uint8_t(unescaped[i]) <= 0x1F) { + // If packed, this uses 8 * 32 bytes. + // Note that we expect most compilers to embed this code in the data + // section. + constexpr static escape_sequence escaped[32] = { + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + auto u = escaped[uint8_t(unescaped[i])]; + buffer.insert(buffer.end(), u.string, u.string + u.length); + } else { + one_char(unescaped[i]); + } + } // switch + } // for + one_char('\"'); +} + +inline void mini_formatter::clear() { + buffer.clear(); +} + +simdjson_inline std::string_view mini_formatter::str() const { + return std::string_view(buffer.data(), buffer.size()); +} + + +/*** + * String building code. + **/ + +template +inline void string_builder::append(simdjson::dom::element value) { + // using tape_type = simdjson::internal::tape_type; + size_t depth = 0; + constexpr size_t MAX_DEPTH = 16; + bool is_object[MAX_DEPTH]; + is_object[0] = false; + bool after_value = false; + + internal::tape_ref iter(value.tape); + do { + // print commas after each value + if (after_value) { + format.comma(); + } + // If we are in an object, print the next key and :, and skip to the next + // value. + if (is_object[depth]) { + format.key(iter.get_string_view()); + iter.json_index++; + } + switch (iter.tape_ref_type()) { + + // Arrays + case tape_type::START_ARRAY: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] + depth--; + break; + } + + // Output start [ + format.start_array(); + iter.json_index++; + + // Handle empty [] (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + depth--; + break; + } + + is_object[depth] = false; + after_value = false; + continue; + } + + // Objects + case tape_type::START_OBJECT: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } + depth--; + break; + } + + // Output start { + format.start_object(); + iter.json_index++; + + // Handle empty {} (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_OBJECT) { + format.end_object(); + depth--; + break; + } + + is_object[depth] = true; + after_value = false; + continue; + } + + // Scalars + case tape_type::STRING: + format.string(iter.get_string_view()); + break; + case tape_type::INT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::UINT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::DOUBLE: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::TRUE_VALUE: + format.true_atom(); + break; + case tape_type::FALSE_VALUE: + format.false_atom(); + break; + case tape_type::NULL_VALUE: + format.null_atom(); + break; + + // These are impossible + case tape_type::END_ARRAY: + case tape_type::END_OBJECT: + case tape_type::ROOT: + SIMDJSON_UNREACHABLE(); + } + iter.json_index++; + after_value = true; + + // Handle multiple ends in a row + while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || + iter.tape_ref_type() == tape_type::END_OBJECT)) { + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + } else { + format.end_object(); + } + depth--; + iter.json_index++; + } + + // Stop when we're at depth 0 + } while (depth != 0); +} + +template +inline void string_builder::append(simdjson::dom::object value) { + format.start_object(); + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + append(*pair); + for (++pair; pair != end; ++pair) { + format.comma(); + append(*pair); + } + } + format.end_object(); +} + +template +inline void string_builder::append(simdjson::dom::array value) { + format.start_array(); + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + append(*iter); + for (++iter; iter != end; ++iter) { + format.comma(); + append(*iter); + } + } + format.end_array(); +} + +template +simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + format.key(kv.key); + append(kv.value); +} + +template +simdjson_inline void string_builder::clear() { + format.clear(); +} + +template +simdjson_inline std::string_view string_builder::str() const { + return format.str(); +} + + +} // namespace internal +} // namespace simdjson + +#endif +/* end file include/simdjson/dom/serialization-inl.h */ + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_DOM_H +/* end file include/simdjson/dom.h */ +/* begin file include/simdjson/builtin.h */ +#ifndef SIMDJSON_BUILTIN_H +#define SIMDJSON_BUILTIN_H + +/* begin file include/simdjson/implementations.h */ +#ifndef SIMDJSON_IMPLEMENTATIONS_H +#define SIMDJSON_IMPLEMENTATIONS_H + +/* begin file include/simdjson/implementation-base.h */ +#ifndef SIMDJSON_IMPLEMENTATION_BASE_H +#define SIMDJSON_IMPLEMENTATION_BASE_H + +/** + * @file + * + * Includes common stuff needed for implementations. + */ + + +// Implementation-internal files (must be included before the implementations themselves, to keep +// amalgamation working--otherwise, the first time a file is included, it might be put inside the +// #ifdef SIMDJSON_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other implementations can't +// compile unless that implementation is turned on). +/* begin file include/simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file include/simdjson/internal/jsoncharutils_tables.h */ +/* begin file include/simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file include/simdjson/internal/numberparsing_tables.h */ +/* begin file include/simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file include/simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_IMPLEMENTATION_BASE_H +/* end file include/simdjson/implementation-base.h */ + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#endif + +// Default Westmere to on if this is x86-64. Note that the macro SIMDJSON_REQUIRES_HASWELL appears unused. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#define SIMDJSON_IMPLEMENTATION_WESTMERE (SIMDJSON_IS_X86_64 && !SIMDJSON_REQUIRES_HASWELL) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 // (!SIMDJSON_CAN_ALWAYS_RUN_ARM64 && !SIMDJSON_CAN_ALWAYS_RUN_HASWELL && !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE && !SIMDJSON_CAN_ALWAYS_RUN_PPC64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_UNDESIRED_WARNINGS + +// Implementations +/* begin file include/simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + + +#if SIMDJSON_IMPLEMENTATION_ARM64 + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { +} // namespace arm64 +} // namespace simdjson + +/* begin file include/simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + + +namespace simdjson { +namespace arm64 { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file include/simdjson/arm64/implementation.h */ + +/* begin file include/simdjson/arm64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "arm64" +// #define SIMDJSON_IMPLEMENTATION arm64 +/* end file include/simdjson/arm64/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file include/simdjson/arm64/intrinsics.h */ +/* begin file include/simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file include/simdjson/arm64/bitmanipulation.h */ +/* begin file include/simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file include/simdjson/arm64/bitmask.h */ +/* begin file include/simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +#include + + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file include/simdjson/arm64/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/arm64/stringparsing.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_H +#define SIMDJSON_ARM64_STRINGPARSING_H + + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_H +/* end file include/simdjson/arm64/stringparsing.h */ +/* begin file include/simdjson/arm64/numberparsing.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_H +#define SIMDJSON_ARM64_NUMBERPARSING_H + +namespace simdjson { +namespace arm64 { +namespace { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace arm64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_ARM64_NUMBERPARSING_H +/* end file include/simdjson/arm64/numberparsing.h */ +/* begin file include/simdjson/arm64/end.h */ +/* end file include/simdjson/arm64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 + +#endif // SIMDJSON_ARM64_H +/* end file include/simdjson/arm64.h */ +/* begin file include/simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + + +#if SIMDJSON_IMPLEMENTATION_FALLBACK + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { +} // namespace fallback +} // namespace simdjson + +/* begin file include/simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + + +namespace simdjson { +namespace fallback { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file include/simdjson/fallback/implementation.h */ + +/* begin file include/simdjson/fallback/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "fallback" +// #define SIMDJSON_IMPLEMENTATION fallback +/* end file include/simdjson/fallback/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +#include + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file include/simdjson/fallback/bitmanipulation.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/fallback/stringparsing.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_H +#define SIMDJSON_FALLBACK_STRINGPARSING_H + + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_H +/* end file include/simdjson/fallback/stringparsing.h */ +/* begin file include/simdjson/fallback/numberparsing.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_H + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace { +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace fallback { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_H +/* end file include/simdjson/fallback/numberparsing.h */ +/* begin file include/simdjson/fallback/end.h */ +/* end file include/simdjson/fallback/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK +#endif // SIMDJSON_FALLBACK_H +/* end file include/simdjson/fallback.h */ +/* begin file include/simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + + +#if SIMDJSON_IMPLEMENTATION_ICELAKE + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_TARGET_ICELAKE +#define SIMDJSON_UNTARGET_ICELAKE +#else +#define SIMDJSON_TARGET_ICELAKE SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt") +#define SIMDJSON_UNTARGET_ICELAKE SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { +} // namespace icelake +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_ICELAKE +// +/* begin file include/simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +using namespace simdjson; + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file include/simdjson/icelake/implementation.h */ +/* begin file include/simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file include/simdjson/icelake/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/icelake/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "icelake" +// #define SIMDJSON_IMPLEMENTATION icelake +SIMDJSON_TARGET_ICELAKE +/* end file include/simdjson/icelake/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file include/simdjson/icelake/bitmanipulation.h */ +/* begin file include/simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file include/simdjson/icelake/bitmask.h */ +/* begin file include/simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + + + + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { +#if SIMDJSON_GCC8 + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); +#else + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), 16 - N); +#endif + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file include/simdjson/icelake/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/icelake/stringparsing.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_H +#define SIMDJSON_ICELAKE_STRINGPARSING_H + + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_H +/* end file include/simdjson/icelake/stringparsing.h */ +/* begin file include/simdjson/icelake/numberparsing.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_H + +namespace simdjson { +namespace icelake { +namespace { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace icelake { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_H +/* end file include/simdjson/icelake/numberparsing.h */ +/* begin file include/simdjson/icelake/end.h */ +SIMDJSON_UNTARGET_ICELAKE +/* end file include/simdjson/icelake/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_ICELAKE +#endif // SIMDJSON_ICELAKE_H +/* end file include/simdjson/icelake.h */ +/* begin file include/simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + + +#if SIMDJSON_IMPLEMENTATION_HASWELL + +#if SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_TARGET_HASWELL +#define SIMDJSON_UNTARGET_HASWELL +#else +#define SIMDJSON_TARGET_HASWELL SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt") +#define SIMDJSON_UNTARGET_HASWELL SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { +} // namespace haswell +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_HASWELL +// +/* begin file include/simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +using namespace simdjson; + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file include/simdjson/haswell/implementation.h */ +/* begin file include/simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file include/simdjson/haswell/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/haswell/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "haswell" +// #define SIMDJSON_IMPLEMENTATION haswell +SIMDJSON_TARGET_HASWELL +/* end file include/simdjson/haswell/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file include/simdjson/haswell/bitmanipulation.h */ +/* begin file include/simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file include/simdjson/haswell/bitmask.h */ +/* begin file include/simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file include/simdjson/haswell/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/haswell/stringparsing.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_H +#define SIMDJSON_HASWELL_STRINGPARSING_H + + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_H +/* end file include/simdjson/haswell/stringparsing.h */ +/* begin file include/simdjson/haswell/numberparsing.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_H +#define SIMDJSON_HASWELL_NUMBERPARSING_H + +namespace simdjson { +namespace haswell { +namespace { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace haswell { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_H +/* end file include/simdjson/haswell/numberparsing.h */ +/* begin file include/simdjson/haswell/end.h */ +SIMDJSON_UNTARGET_HASWELL +/* end file include/simdjson/haswell/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_HASWELL +#endif // SIMDJSON_HASWELL_COMMON_H +/* end file include/simdjson/haswell.h */ +/* begin file include/simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + + +#if SIMDJSON_IMPLEMENTATION_PPC64 + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { +} // namespace ppc64 +} // namespace simdjson + +/* begin file include/simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + + +namespace simdjson { +namespace ppc64 { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} // namespace + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file include/simdjson/ppc64/implementation.h */ + +/* begin file include/simdjson/ppc64/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "ppc64" +// #define SIMDJSON_IMPLEMENTATION ppc64 +/* end file include/simdjson/ppc64/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file include/simdjson/ppc64/intrinsics.h */ +/* begin file include/simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file include/simdjson/ppc64/bitmanipulation.h */ +/* begin file include/simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file include/simdjson/ppc64/bitmask.h */ +/* begin file include/simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +// Forward-declared so they can be used by splat and friends. +template struct simd8; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file include/simdjson/ppc64/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/ppc64/stringparsing.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_H +#define SIMDJSON_PPC64_STRINGPARSING_H + + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_H +/* end file include/simdjson/ppc64/stringparsing.h */ +/* begin file include/simdjson/ppc64/numberparsing.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_H +#define SIMDJSON_PPC64_NUMBERPARSING_H + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +static simdjson_inline uint32_t +parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace ppc64 { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_PPC64_NUMBERPARSING_H +/* end file include/simdjson/ppc64/numberparsing.h */ +/* begin file include/simdjson/ppc64/end.h */ +/* end file include/simdjson/ppc64/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_PPC64 + +#endif // SIMDJSON_PPC64_H +/* end file include/simdjson/ppc64.h */ +/* begin file include/simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + + +#if SIMDJSON_IMPLEMENTATION_WESTMERE + +#if SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_TARGET_WESTMERE +#define SIMDJSON_UNTARGET_WESTMERE +#else +#define SIMDJSON_TARGET_WESTMERE SIMDJSON_TARGET_REGION("sse4.2,pclmul") +#define SIMDJSON_UNTARGET_WESTMERE SIMDJSON_UNTARGET_REGION +#endif + +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { +} // namespace westmere +} // namespace simdjson + +// +// These two need to be included outside SIMDJSON_TARGET_WESTMERE +// +/* begin file include/simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +namespace { +using namespace simdjson; +using namespace simdjson::dom; +} + +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file include/simdjson/westmere/implementation.h */ +/* begin file include/simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +#ifdef SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#ifdef SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file include/simdjson/westmere/intrinsics.h */ + +// +// The rest need to be inside the region +// +/* begin file include/simdjson/westmere/begin.h */ +// redefining SIMDJSON_IMPLEMENTATION to "westmere" +// #define SIMDJSON_IMPLEMENTATION westmere +SIMDJSON_TARGET_WESTMERE +/* end file include/simdjson/westmere/begin.h */ + +// Declarations +/* begin file include/simdjson/generic/dom_parser_implementation.h */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/dom_parser_implementation.h */ +/* begin file include/simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file include/simdjson/westmere/bitmanipulation.h */ +/* begin file include/simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file include/simdjson/westmere/bitmask.h */ +/* begin file include/simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file include/simdjson/westmere/simd.h */ +/* begin file include/simdjson/generic/jsoncharutils.h */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#ifdef SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +using internal::value128; + +simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { + value128 answer; +#if defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // defined(SIMDJSON_REGULAR_VISUAL_STUDIO) || defined(SIMDJSON_IS_32BITS) + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/jsoncharutils.h */ +/* begin file include/simdjson/generic/atomparsing.h */ +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/atomparsing.h */ +/* begin file include/simdjson/westmere/stringparsing.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_H +#define SIMDJSON_WESTMERE_STRINGPARSING_H + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_H +/* end file include/simdjson/westmere/stringparsing.h */ +/* begin file include/simdjson/westmere/numberparsing.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_H + +namespace simdjson { +namespace westmere { +namespace { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +/* begin file include/simdjson/generic/numberparsing.h */ +#include + +namespace simdjson { +namespace westmere { + +namespace ondemand { +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; +} + +namespace { +/// @private +namespace numberparsing { + + + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} +} +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) { +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) { +#endif + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return ondemand::number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return ondemand::number_type::unsigned_integer; + } + } + return ondemand::number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return ondemand::number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} +} //namespace {} +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson +/* end file include/simdjson/generic/numberparsing.h */ + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_H +/* end file include/simdjson/westmere/numberparsing.h */ +/* begin file include/simdjson/westmere/end.h */ +SIMDJSON_UNTARGET_WESTMERE +/* end file include/simdjson/westmere/end.h */ + +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE +#endif // SIMDJSON_WESTMERE_COMMON_H +/* end file include/simdjson/westmere.h */ + +// Builtin implementation + +SIMDJSON_POP_DISABLE_WARNINGS + +#endif // SIMDJSON_IMPLEMENTATIONS_H +/* end file include/simdjson/implementations.h */ + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +// redefining SIMDJSON_IMPLEMENTATION to "SIMDJSON_BUILTIN_IMPLEMENTATION" +// #define SIMDJSON_IMPLEMENTATION SIMDJSON_BUILTIN_IMPLEMENTATION + +// ondemand is only compiled as part of the builtin implementation at present + +// Interface declarations +/* begin file include/simdjson/generic/implementation_simdjson_result_base.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/implementation_simdjson_result_base.h */ +/* begin file include/simdjson/generic/ondemand.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +/* begin file include/simdjson/generic/ondemand/json_type.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +class value_iterator; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_type.h */ +/* begin file include/simdjson/generic/ondemand/token_position.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_position.h */ +/* begin file include/simdjson/generic/ondemand/logger.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class json_iterator; +class value_iterator; + +namespace logger { + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/logger.h */ +/* begin file include/simdjson/generic/ondemand/raw_json_string.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class object; +class parser; +class json_iterator; + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter) const noexcept; + + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/raw_json_string.h */ +/* begin file include/simdjson/generic/ondemand/token_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_iterator.h */ +/* begin file include/simdjson/generic/ondemand/json_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class document; +class document_stream; +class object; +class array; +class value; +class raw_json_string; +class parser; + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + template simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept; + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_iterator.h */ +/* begin file include/simdjson/generic/ondemand/value_iterator.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class document; +class object; +class array; +class value; +class raw_json_string; +class parser; + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool() noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number() noexcept; + simdjson_inline bool is_root_null() noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value_iterator.h */ +/* begin file include/simdjson/generic/ondemand/array_iterator.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class value; +class document; + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array_iterator.h */ +/* begin file include/simdjson/generic/ondemand/object_iterator.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class field; + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object_iterator.h */ +/* begin file include/simdjson/generic/ondemand/array.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class value; +class document; + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array.h */ +/* begin file include/simdjson/generic/ondemand/document.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class parser; +class array; +class object; +class value; +class raw_json_string; +class array_iterator; +class document_stream; + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_from_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document.h */ +/* begin file include/simdjson/generic/ondemand/value.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class document; +class field; +class object; +class raw_json_string; + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string() noexcept; + + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value.h */ +/* begin file include/simdjson/generic/ondemand/field.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key() noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key() noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/field.h */ +/* begin file include/simdjson/generic/ondemand/object.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object.h */ +/* begin file include/simdjson/generic/ondemand/parser.h */ + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class array; +class object; +class value; +class raw_json_string; +class document_stream; + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ +simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; +/** @overload simdjson_result iterate(padded_string_view json) & noexcept */ +simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param in input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst) const noexcept; +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/parser.h */ +/* begin file include/simdjson/generic/ondemand/document_stream.h */ +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +class parser; +class json_iterator; +class document; + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document_stream.h */ +/* begin file include/simdjson/generic/ondemand/serialization.h */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param x The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand +/* end file include/simdjson/generic/ondemand/serialization.h */ +/* end file include/simdjson/generic/ondemand.h */ + +// Inline definitions +/* begin file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson +/* end file include/simdjson/generic/implementation_simdjson_result_base-inl.h */ +/* begin file include/simdjson/generic/ondemand-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_type-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_type-inl.h */ +/* begin file include/simdjson/generic/ondemand/logger-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + // Technically a static variable is not thread-safe, but if you are using threads + // and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if(!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf("# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN+2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN+2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN+2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5+2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } +} + +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail); +} +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail) noexcept { + if (LOG_ENABLED) { + const int indent = depth*2; + const auto buf = iter.token.buf; + printf("| %*s%s%-*s ", + indent, "", + title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), title + ); + { + // Print the current structural. + printf("| "); + auto current_structural = &buf[*index]; + for (int i=0;i(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter) const noexcept { + return iter.unescape(*this); +} + + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape(iter); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/raw_json_string-inl.h */ +/* begin file include/simdjson/generic/ondemand/token_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/token_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/json_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(0); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in) noexcept { + return parser->unescape(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + +template +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t (&tmpbuf)[N]) noexcept { + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Truncate whitespace to fit the buffer. + if (max_len > N-1) { + // if (jsoncharutils::is_not_structural_or_whitespace(json[N-1])) { return false; } + max_len = N-1; + } + + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + tmpbuf[max_len] = ' '; + return true; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/json_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/value_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string() noexcept { + return get_raw_json_string().unescape(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type() noexcept { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + auto answer = numberparsing::get_number_type(tmpbuf); + if((answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string() noexcept { + return get_string(); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string() noexcept { + return get_raw_json_string(); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1]; // <20 digits> is the longest possible unsigned integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1]; // -<19 digits> is the longest possible integer + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool() noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1]; + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (!_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline bool value_iterator::is_root_null() noexcept { + // If there is trailing content, then the document is not null. + if (!_json_iter->is_single_token()) { return false; } + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(result) { advance_root_scalar("null"); } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array_iterator &&value +) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : SIMDJSON_BUILTIN_IMPLEMENTATION::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/object_iterator-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object_iterator-inl.h */ +/* begin file include/simdjson/generic/ondemand/array-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/array-inl.h */ +/* begin file include/simdjson/generic/ondemand/document-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': + case '{': + return value(get_root_value_iterator()); + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + // return value(get_root_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(); +} +simdjson_inline simdjson_result document::get_string() noexcept { + return get_root_value_iterator().get_root_string(); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_uint64(); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_int64(); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_double(); } +simdjson_inline simdjson_result document_reference::get_string() noexcept { return doc->get_string(); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_raw_json_string(); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_bool(); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->is_null(); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return uint64_t(*doc); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return int64_t(*doc); } +simdjson_inline document_reference::operator double() noexcept(false) { return double(*doc); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return bool(*doc); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->is_integer(); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_number_type(); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_number(); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/document-inl.h */ +/* begin file include/simdjson/generic/ondemand/value-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string() noexcept { + return iter.get_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string() noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/value-inl.h */ +/* begin file include/simdjson/generic/ondemand/field-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key() noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter()); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key() noexcept { + if (error()) { return error(); } + return first.unescaped_key(); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/field-inl.h */ +/* begin file include/simdjson/generic/ondemand/object-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { return NO_SUCH_FIELD; } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek(0)}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/object-inl.h */ +/* begin file include/simdjson/generic/ondemand/parser-inl.h */ +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size) noexcept { + return iterate_many(s.data(), s.length(), batch_size); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson +/* end file include/simdjson/generic/ondemand/parser-inl.h */ +/* begin file include/simdjson/generic/ondemand/document_stream-inl.h */ +#include +#include +#include +namespace simdjson { +namespace SIMDJSON_BUILTIN_IMPLEMENTATION { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace SIMDJSON_BUILTIN_IMPLEMENTATION +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} +/* end file include/simdjson/generic/ondemand/document_stream-inl.h */ +/* begin file include/simdjson/generic/ondemand/serialization-inl.h */ + + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace SIMDJSON_BUILTIN_IMPLEMENTATION { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand +/* end file include/simdjson/generic/ondemand/serialization-inl.h */ +/* end file include/simdjson/generic/ondemand-inl.h */ + + +namespace simdjson { + /** + * Represents the best statically linked simdjson implementation that can be used by the compiling + * program. + * + * Detects what options the program is compiled against, and picks the minimum implementation that + * will work on any computer that can run the program. For example, if you compile with g++ + * -march=westmere, it will pick the westmere implementation. The haswell implementation will + * still be available, and can be selected at runtime, but the builtin implementation (and any + * code that uses it) will use westmere. + */ + namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; + /** + * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand + */ + namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; + /** + * Function which returns a pointer to an implementation matching the "builtin" implementation. + * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling + * program. If you compile with g++ -march=haswell, this will return the haswell implementation. + * It is handy to be able to check what builtin was used: builtin_implementation()->name(). + */ + const implementation * builtin_implementation(); +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_H +/* end file include/simdjson/builtin.h */ + +#endif // SIMDJSON_H +/* end file include/simdjson.h */ diff --git a/kram-profile/README.md b/kram-profile/README.md new file mode 100644 index 00000000..6e7bfea3 --- /dev/null +++ b/kram-profile/README.md @@ -0,0 +1,290 @@ +kram-profile +========== + +kram-profile wraps SwiftUI atop a WKWebView running the Perfetto TraceViewer. A dev can open directories or files of traces. Supported files are added to a list to quickly view these in Perfetto. The app is multi-document. Each window is a single instance of Pefertto TraceViewer that is loaded once. The sandboxed SwiftUI acts as the bridge to the native file system, which the TraceViewer browser sandbox lacks. + +Flamegraphs are key to all profiling. Why look at giant table of numbers when you can see them visually. Flamegraphs also need to be dynamic and display hover tips and details. Fortunately there are several tools now supporting flamegraphs. Perfetto is one such tool. + +kram-profile fixes up build traces to reflect the name of the file/function. And it demangles function names from clang. + +Files can be dragged onto the list view, double-clicked from Finder if the filenames below are associated with the app, or there is an Open and Refresh command. + +Supported files + +* .memtrace - memory report generated by Kram scripts folder. +* .trace/.perftrace - performance timings in the form catapult trace json files +* .json/.buildtrace - clang timing output generated using -ftime-trace +* .zip archives of above +* .gzip compressed files of above +* folders of loose files or achives + +There is a pre-built version of kram-profile for macOS 13.0 and higher. + +List view + File type, name, duration + Up/down arrow keys or cmd+N/cmd+shift+N to advance through list + Hover for path of filename + Can collapse/restore the list + Type-search in the list + +Navigation Title + Filename (archive) + Info button (memtrace) - shows max of tracks for heap size + cmd+T search by duration + cmd+shift+T search by name + +WKWebView + Perfetto Flamegraph + Tracknames on left + cmd+S to search for a named entry in flamegraph + cmd+shift+P to parse command + Cannot hide the tracknames + +---------------- + +TODO: (x are done) +* x Fix document support, so can double click and have app open files. readFromURL like kramv. +* x Support binary Perfetto traces. Test with Google sample code. +* x Fixup "Source" tags in clang json to use filename (no extension) from detail field +* x Find start/end time of each json files. +* x Support gzip trace files +* x Add sort by range (useful for mem/build traces) +* x Add zip archive support, can drop archive of 1+ traces +* x Tie in with the excellent ClangBuildAnalyzer tool + +* Add frame type for perf traces for vsync ticker (binary format prob has it) +* Scale specific traces to a single duration. That way the next file comes in at that scale. +* Move away from Catapult json to own binary format. Can then translate to json or use the Perfetto SDK to convert to protobufs. + +---------------- + +#Profilers + +Cpu Profilers. See for more details + +* Catapult - see below +* Perfetto - see below +* Flutter (using Perfetto) https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview#heading=h.yr4qxyxotyw +* Optick - https://github.com/bombomby/optick +* Tracy - https://github.com/wolfpld/tracy + +* ClangBuildAnalyzer - https://github.com/aras-p/ClangBuildAnalyzer +* Microprofile +* Microprofile 2 +* Microprofiler +* EasyProfiler +* VerySleepy +* LukeStackwalker +* Remotery +* geiger +* Palanteer +* Intel IACA +* Coz +* heaptrack +* hotspot +* dprofiler +* spall + +* Commercial +* Telemetry - httpd://www.radgametools.com/telemetry.htm +* Superluminal - higher-rate sampling profiler +* Xcode Instruments - see Xcode +* AMD Code Analyst - see Xcode +* Intel Vtune - + +Gpu Profilers. See for more details + +* Xcode Gpu Capture +* Android Gpu Inspector - https://developer.android.com/agi +* Nvidia NSight +* Mali Shader Compiler +* Pix Profiler + +Catapult +--------- + +This was the tracing system that Perfetto replaced. Originally designed for Chrome profiling. Flamegraph and track-based. It also had a nice json API for recording thread names and profile scopes. + +Perfetto +--------- +* https://ui.perfetto.dev +* https://perfetto.dev/docs/visualization/deep-linking-to-perfetto-ui + +This is a web-based profiling and flame-graph tool. It's fast on desktop, and continues to evolve. Only has second and timecode granularity which isn't enough. For example, performance profiling for games is in milliseconds. The team is mostly focused on Chrome profiling which apparently is in seconds. But the visuals are nice, and it now has hover tips with size/name, and also has an Issues list that the devs are responsive to. Flutter is using this profiler, and kram-profile does too. + +Perfetto lives inside a sandbox due to the browser, so feeding files to Perfetto is one weakness. As a result kram-profile's file list is a nice complement, and can send the file data across via Javascript. This is not unlike an Electron wrapper, but in much less memory. + +One limitation is that traces must be nested. So timestamps cannot overlap. Make sure to honor this, or traces will overlap verticall and become confused. There is a C++ SDK to help with writing out traces, and that is a much more compact format than the json. But more languages can write to the json format. The Perfetto team is doing no further work on the json format. And fields like "color" are unsupported, and Perfetto uses it's own coloration for blocks instead. This coloration is nice and consistent and tied to name. + +Having lots of issues trying to reuse the Perfetto web page to load more than one profile. The web app gets into a bad state, and then won't load any content afterwareds. + +Orbit +--------- +* https://orbitprofiler.com/ + +This profiler uses dynamic instrumentation of code via dtrace and trampolines. Note that Win, macOS can use this sort of system. Apple blocks access to dtrace on iOS, but there are mentions of ktrace. So you inject/remove traces dynamically by patching the dll sources directly. This used to run on macOS, Win, and Linux. Google Stadia adopted this project, and now it is limited to Linux support. + +This avoids the need to blindly instrument code or inject scopes into high-frequency routines. But this patching is not be compatible by the security theater adopted by iOS devices. + +ClangBuildAnalyzer +-------- +* https://github.com/aras-p/ClangBuildAnalyzer + +A nice build profile aggregator. Runs through the json timings that Clang generates, and details which headers and templates and optimization are slowing down builds. Then go back and review the json files to validate the results. Uses hierarchical and not self time, so the timings do overlap. And timings across threads total up to more timing than the overal build takes. + +Has an incremental system to snapshot and compare modestamps, and only do work on newer files. This is some great open-source. Aras optimized Unity builds with this, and that's a huge codebase. I've used this to optimize kram. + +Include What You Use +--------- +* https://github.com/include-what-you-use/include-what-you-use + +Automate the tedium of finding the minimal set of headers for C/C++ with this utility. A third party added ObjC support, but it hasn't landed. Seems like on large projects the includes gets out of hand. So I look forward to trying this out. + +Rewrites the headers, but there are ways to keep it from removing some. Unclear how this works with cross-platform code. But maybe it only strips includes within the defines that it sees. Send the CXXFLAGS used for the build to the exe along with a source file. + +# Use Cases + +Memory profiling +--------- + +VMA can dump a json file, and that can be converted using scripts/GpuMemDumpPerfetto.py. Then open this in kram-profile to see current memory fragmentation and layout across the various Vulkan heaps. VMA can generate a png, but it's static. Perfetto can allow one to zoom in and see the actual names of blocks and size. + +Set the Pefetto timestamp to seconds, and then 1s = 1MB. This allows reading the timings as megabytes. A good timescale is 64s (64MB). + +Performance profiling +--------- + +Have app write out time and duration events using the Catapult json format. Then open these in kram-profile to optimize an application. A good timescale is 0.1s for games. Can then see where app performance is lost across threads and job systems. It is harder to measure async wait gaps, since these are not nested properly. Also good to instrument sleeps. Not sure now to scope fibers, since these get swapped out. There are events which aren't duration based, so use those. + +Build profiling +--------- + +Clang supports -ftime-trace across all platforms. Set that to dump the Perfetto trace files into the build directories alongside the .o files. Then use kram-profile to open these folders. Also see scripts/cba.sh for to run ClangBuildAnalyzer on these folders to identify where build timings are slow. Then address with optimizing includes and using pch where possible. A good timescale is 1s. Files that take longer than this to build should be targeted. + +Simd libraries, and especially files like STL with heavy template generation will often be at the top of the list. PCH will reduce parsing time for templates, but not the instantiation. See the Optimization section for more details. + +Ideally run the traces, run CBA, reduce headers and identify pch candidates. Then repeat, until overall timings go down. Remember that PCH is per link, so one per DLL or app. It also break isolation of headers in files, so may want a CI build not using it to catch unspecified headers. + +Ninja Build +--------- + +This is a minimal version of Make. But code must generate the Ninja file. Cmake is one generator, and GN is the primary generator. But Ninja is so simple that it's fairly easy to specify directly. I'm experimenting with this in the hlslparser, where I wrote the Ninja files manually just to work with the syntax. + +# Optimization + +Unity builds +----------- + +Not to be confused with the Unity game engine. But unity builds combine several .cpp files into a single .cpp. This works around problems with slow linkers, and multiple template and inline code instantations. But code and macros from one .cpp spill into the next. To avoid this, be careful about undeffing at the bottoms of files. kram also uses a common namespaces across headers and source files. This allows "using namespace" in both, and keeps the namespaces compartmentalized. + +Precompiled headers (PCH) +----------- + +These are a precursor to C++ modules. pch are universally support across compilers, where we may never see C++ modules. You get one pch per library. So if your app is a DLL and a exe, then each could have their own pch. Need one pch per platform and config. Force include this since it must be the first file in each, or explicitly include a file if you want to be explicit about which files get the pch. + +pch spread headers into files. So the build can break if some don't use it, or configs skip it. Occasionally fixup missing headers by disabling it. Templates are parsed by only specializations are instatiated. So may be worth defining specializations in the pch. STL is always a top offender with vector/unordered_map, function, and others at the top. + +There are broken examples of setting up pch for Makefiles all over the internet. Maybe cmake has a valid setup, but the jist is below for gcc/clang. Make sure to verify the parse time is gone in kram-profile by looking at the clang build profiles. + +Clang has options to generate a pch .o file. This must be linked separately into the library. This is something MSVC pch support for a long time. gcc doesn't support this. See the link below, and the pchObj in the makefile example below. + +Advanced clang pch usage +https://maskray.me/blog/2023-07-16-precompiled-headers + + + # gen the .d file, written to tmp and only replaces if it changes + cppFlags = ... + + cppDepFlags = -MMD -MP (or -MD) + + # header must be unique to build (f.e. defines, etc) + cppBuild = $(platform)($config) + + # setup the files involved, only get 1 pch per DLL/App since + pchSrc = Precompile.h + pchHdrSrc = Precompile-$(cppBuild).h + pchDeps = $(pchHdr).d + pchHdr = $(pchHdrSrc).pch + pchObj = $(pchHdr).o + pchIncludesDirs = -Idir1 -Idir2 + + # this does code gen, templates, and debuginfo into the h.pch.o file + pchFlags = -fpch-codegen -fpch-instantiate-templates -fpch-debuginfo + + # important - only copy the hdr if it changes, don't want full rebuild every time + # linux (cp -u), win (xcopy), macOS (shell compare then cp) + $(pchHdrSrc): $(pchSrc) + cp $< $@ + + # this will output the .d and .gch file + $(pchHdr): $(pchHdrSrc) + clang++ -x c++header $(cppFlags) $(cppDepFlags) $(pchFlags) $(pchIncludesDirs) -c $< -o $@ + + # this makes sure that the pch is rebuilt if hdrs within pchHdr changee + # the - sign ignores the deps file on the first run where it does not exist. + $(pchDeps): ; + -include $(pchDeps) + + # optional code to build .o from .pch + # must link this in with the lib/exe, don't use "-x c++" here - it's ast not C++ code + # speeds the build, since code isn't prepended to each .o file, and then linked. + $(pchObj): $(pchHdr) + clang++ $(cppFlags) -c $< -o $@ + + .... + + # prefix Precompile.h.pch to each .o file + cppPchFlags = -include-pch $(pchHdr) + + # now build the files + *.cpp: ... $(pchHdr) + clang++ $(cppFlags) $(cppPchFlags) -c $< -o $@ + + # link the pchObj into the lib or ese + allObjs = *.o $(pchObj) + + $(libOrExe): $(allObjs) + clang++ $< -o $@ + + +SIMD +----------- + +Vector instructions are universal now via SIMD. For 16B SIMD, ARM has Neon and x64 has SSE4.2. AVX/2 introduce 32B, and AVX-512 is 64B registers, but Intel has stripped that from newer consumer chips, and is introducing AVX10. So AVX2 is as safe as it gets. Note that Apple's Rosetta 2 emulator only supports SSE 4.2 at the time of this writing. x64 SSE is always 16B size and 16B aligned, where Neon has an 8B float32x2 and 16B float32x4. The default allodator for macOS is 16B aligned. x64 is 16B aligned, but x86 was 8B alignd. + +Apple has a very nice SIMD (simd/simd.h) library. This uses the gcc vector extensions so swizzles and math operators are built into the compiler. This makes the code look more HLSL like which is a good thing. This ships with all calls inline, but optimized 2/3/4 way trancendental calls are buried in the Accelerate library, and the implementation just calls the c stdlib functions multiple times as a fallback. It has a nice abstraction for int, uint, float, double simd math. One defines the maximum SIMD level supported by the app, and the library then uses the largest register size that it can for that platform. The higher size registers work with 16B alignment, so that is what Apple uses. + +Optimized debug builds +----------- + +One nice aspect of C++ is that specific files can be optimized. But to do so, calls outside the .cpp become functions instead of inlines. But within the .cpp, they get inlined and optimized. Setting this up on a SIMD library takes a bit of work, but then callers are running optimized math even in debug. + +Also Microsoft has various debug build flags that can optimize and optimize force_inline calls. Need to find out the details for clang. These disable Edit & Continue, but clang in Visual Studio doesn't support it anyways. + +* https://learn.microsoft.com/en-us/visualstudio/debugger/how-to-debug-optimized-code?view=vs-2022 + +* Visual Studio +* Use /Zo instead of /Od. Now with Edit&continue. +* /d2Zi+ +* Use VS2022, it's 64-bit +* Avoid C++20, it's slower to compile +* /Ob1 allows inline of inline, __inline, or __forceinline, and member functions in the class decls. +* disable STL bounds checking +* WIN_LEAN_AND_MEAN +* NOMINMAX +* use clang-cli + +Xcode +* make sure to deadstrip the release build, or it's huge +* Cmake uses /Ob1 for RelWithDebInfo +* use SSE4.2 for Resetta4.2, and make sure to use Neon on arm64 + +* https://randomascii.wordpress.com/2013/09/11/debugging-optimized-codenew-in-visual-studio-2012/ + +* https://dirtyhandscoding.github.io/posts/fast-debug-in-visual-c.html + + + + + + diff --git a/kram-profile/Source/KramZipHelper.cpp b/kram-profile/Source/KramZipHelper.cpp new file mode 100644 index 00000000..ddf9b889 --- /dev/null +++ b/kram-profile/Source/KramZipHelper.cpp @@ -0,0 +1,457 @@ +#include "KramZipHelper.h" + +#include +//#include // for copy_if on Win +#include +#include +#include +#include + +#include "miniz.h" + +// test for perf of this compared to one in miniz also see +// comments about faster algs. +// libcompress can only encode lvl 5, but here it's only decompress. +// This seems to fail when used for kramv zip archives, so disable fo now +#ifndef USE_LIBCOMPRESSION +#define USE_LIBCOMPRESSION 0 // (KRAM_MAC || KRAM_IOS) +#endif + +#if USE_LIBCOMPRESSION +#include +#endif + +// Throwing this in for now, since it's the only .cpp file +#if KRAM_MAC || KRAM_IOS +#include // demangle +#endif + +using namespace STL_NAMESPACE; + +// copied out of KramLog.cpp +static int32_t append_vsprintf(string& str, const char* format, va_list args) +{ + // for KLOGE("group", "%s", "text") + if (strcmp(format, "%s") == 0) { + const char* firstArg = va_arg(args, const char*); + str += firstArg; + return (int32_t)strlen(firstArg); + } + + // This is important for the case where ##VAR_ARGS only leaves the format. + // In this case "text" must be a compile time constant string to avoid security warning needed for above. + // for KLOGE("group", "text") + if (strrchr(format, '%') == nullptr) { + str += format; + return (int32_t)strlen(format); + } + + // format once to get length (without NULL at end) + va_list argsCopy; + va_copy(argsCopy, args); + int32_t len = vsnprintf(NULL, 0, format, argsCopy); + va_end(argsCopy); + + if (len > 0) { + size_t existingLen = str.length(); + + // resize and format again into string + str.resize(existingLen + len, 0); + + vsnprintf((char*)str.c_str() + existingLen, len + 1, format, args); + } + + return len; +} + +int32_t append_sprintf(string& str, const char* format, ...) +{ + va_list args; + va_start(args, format); + int32_t len = append_vsprintf(str, format, args); + va_end(args); + + return len; +} + +// This is extracted from CBA Analysis.cpp +extern "C" const char* _Nullable collapseFunctionName(const char* _Nonnull name_) +{ + // Adapted from code in Analysis. Really the only call needed from CBA. + // serialize to multiple threads + static mutex sMutex; + static unordered_map sMap; + lock_guard lock(sMutex); + + string elt(name_); + auto it = sMap.find(elt); + if (it != sMap.end()) { + return it->second.c_str(); + } + + // Parsing op<, op<<, op>, and op>> seems hard. Just skip'm all + if (strstr(name_, "operator") != nullptr) + return nullptr; + + std::string retval; + retval.reserve(elt.size()); + auto b_range = elt.begin(); + auto e_range = elt.begin(); + while (b_range != elt.end()) { + e_range = std::find(b_range, elt.end(), '<'); + if (e_range == elt.end()) + break; + ++e_range; + retval.append(b_range, e_range); + retval.append("$"); + b_range = e_range; + int open_count = 1; + // find the matching close angle bracket + for (; b_range != elt.end(); ++b_range) { + if (*b_range == '<') { + ++open_count; + continue; + } + if (*b_range == '>') { + if (--open_count == 0) { + break; + } + continue; + } + } + // b_range is now pointing at a close angle, or it is at the end of the string + } + if (b_range > e_range) { + // we are in a wacky case where something like op> showed up in a mangled name. + // just bail. + // TODO: this still isn't correct, but it avoids crashes. + return nullptr; + } + // append the footer + retval.append(b_range, e_range); + + // add it to the map + sMap[elt] = std::move(retval); + + return sMap[elt].c_str(); +} + +extern "C" const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_) +{ + // serialize to multiple threads + static mutex sMutex; + static unordered_map sSymbolToDemangleName; + lock_guard lock(sMutex); + + string symbolName(symbolName_); + auto it = sSymbolToDemangleName.find(symbolName); + if (it != sSymbolToDemangleName.end()) { + return it->second; + } + + // see CBA if want a generalized demangle for Win/Linux + size_t size = 0; + int status = 0; + char* symbol = abi::__cxa_demangle(symbolName.c_str(), nullptr, &size, &status); + const char* result = nullptr; + if (status == 0) { + sSymbolToDemangleName[symbolName] = symbol; + result = symbol; + // not freeing the symbols here + //free(symbol); + } + else { + // This will do repeated demangle though. Maybe should add to table? + // Swift fails when returning back the string it marshalled back to stuff back + // into String(cstring: ...). Ugh. So return empty string. + // status = -2 on most of the mangled Win clang-cli symbols. Nice one + // Microsoft. + //result = symbolName_; + + result = nullptr; + } + + return result; +} + +namespace kram { +using namespace STL_NAMESPACE; + +// Copied out of KramLog.cpp +inline bool endsWithExtension(const char* str, const string& substring) +{ + const char* search = strrchr(str, '.'); + if (search == NULL) { + return false; + } + + return strcmp(search, substring.c_str()) == 0; +} + +ZipHelper::ZipHelper() +{ +} + +ZipHelper::~ZipHelper() +{ + close(); +} + +bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize) +{ + zipData = zipData_; + + zip = std::make_unique(); + mz_zip_zero_struct(zip.get()); + + mz_uint flags = 0; + mz_bool success = mz_zip_reader_init_mem(zip.get(), zipData, zipDataSize, flags); + if (!success) { + close(); + return false; + } + + initZipEntryTables(); + return true; +} + +void ZipHelper::filterExtensions(const vector& extensions) +{ + vector zipEntrysFiltered; + + std::copy_if(_zipEntrys.begin(), _zipEntrys.end(), std::back_inserter(zipEntrysFiltered), [&extensions](const auto& zipEntry) { + for (const auto& ext : extensions) { + if (endsWithExtension(zipEntry.filename, ext)) { + return true; + } + } + return false; + }); + + _zipEntrys = zipEntrysFiltered; +} + +void ZipHelper::close() +{ + if (zip != nullptr) { + mz_zip_end(zip.get()); + zip.reset(); + } +} + +void ZipHelper::initZipEntryTables() +{ + int32_t numFiles = mz_zip_reader_get_num_files(zip.get()); + + // allocate array to hold all filenames in one block of memory + uint64_t totalFilenameSizes = 0; + for (int32_t i = 0; i < numFiles; ++i) { + totalFilenameSizes += mz_zip_reader_get_filename(zip.get(), i, nullptr, 0); + } + + const uint32_t* remappedIndices = mz_zip_reader_sorted_file_indices(zip.get()); + + allFilenames.resize(totalFilenameSizes); + + // allocate an array with the data from the archive that we care about + _zipEntrys.resize(numFiles); + + int32_t index = 0; + uint64_t length = 0; + + for (int32_t i = 0; i < numFiles; ++i) { + uint32_t sortedFileIndex = remappedIndices[i]; + + // file_stat does quite a bit of work, but only want a few fields out of it + mz_zip_archive_file_stat stat; + mz_zip_reader_file_stat(zip.get(), sortedFileIndex, &stat); + if (stat.m_is_directory || !stat.m_is_supported) { + continue; + } + + // skipping directories and unsupported items + // also the ordering here is in filename not fileIndex order + + // copy all filenames into fixed storage that's all + // contguous, so that can alis the strings for lookup + uint64_t filenameLength = std::min((uint64_t)512, (uint64_t)strlen(stat.m_filename) + 1); + char* filename = &allFilenames[length]; + strncpy(filename, stat.m_filename, filenameLength); + length += filenameLength; + + ZipEntry& zipEntry = _zipEntrys[index]; + zipEntry.fileIndex = stat.m_file_index; + zipEntry.filename = filename; // can alias + zipEntry.uncompressedSize = stat.m_uncomp_size; + zipEntry.compressedSize = stat.m_comp_size; + zipEntry.modificationDate = (int32_t)stat.m_time; // really a time_t +#undef crc32 + zipEntry.crc32 = stat.m_crc32; + + // TODO: stat.m_time, state.m_crc32 + + index++; + } + + // resize, since entries and filenames were skipped + // this should change the addresses used above + allFilenames.resize(length); + _zipEntrys.resize(index); +} + +int32_t ZipHelper::zipEntryIndex(const char* name) const +{ + // central directory is sorted, so this does binary search on entries + return mz_zip_reader_locate_file(zip.get(), name, "", 0); +} + +const ZipEntry* ZipHelper::zipEntry(const char* name) const +{ + int32_t index = zipEntryIndex(name); + if (index < 0) { + return nullptr; + } + + // have to find the zipEntry, have skipped and sorted entries by filename + // the array build skips directories, so those can throw off the fileIndex + // TODO: do a binary search here, and don't use mz_zip call? + + int32_t numEntries = (int32_t)_zipEntrys.size(); + for (int32_t i = 0; i < numEntries; ++i) { + if (_zipEntrys[i].fileIndex == index) { + return &_zipEntrys[i]; + } + } + + return nullptr; +} + +bool ZipHelper::extract(const char* filename, vector& buffer) const +{ + auto entry = zipEntry(filename); + if (!entry) { + return false; + } + + buffer.resize(entry->uncompressedSize); + if (!extract(*entry, buffer.data(), buffer.size())) { + return false; + } + + return true; +} + +bool ZipHelper::extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const +{ + auto entry = zipEntry(filename); + if (!entry) { + return false; + } + + if (bufferDataSize < entry->uncompressedSize) { + return false; + } + + if (!extract(*entry, bufferData, bufferDataSize)) { + return false; + } + + return true; +} + +bool ZipHelper::extractPartial(const char* filename, vector& buffer) const +{ + if (buffer.empty()) { + assert(false); + return false; + } + + auto entry = zipEntry(filename); + if (!entry) { + return false; + } + + if (buffer.size() > entry->uncompressedSize) { + return false; + } + + bool success = false; + + mz_zip_reader_extract_iter_state* iter = mz_zip_reader_extract_iter_new(zip.get(), entry->fileIndex, 0); + uint64_t bytesRead = mz_zip_reader_extract_iter_read(iter, buffer.data(), buffer.size()); + if (bytesRead == buffer.size()) { + success = true; + } + mz_zip_reader_extract_iter_free(iter); + return success; +} + +bool ZipHelper::extract(const ZipEntry& entry, void* buffer, uint64_t bufferSize) const +{ + // Some more info on doing deflate on M1 + // https://aras-p.info/blog/2021/08/09/EXR-libdeflate-is-great/ + // https://dougallj.wordpress.com/2022/08/20/faster-zlib-deflate-decompression-on-the-apple-m1-and-x86/ + + // https://developer.apple.com/documentation/compression/1481000-compression_decode_buffer?language=objc + + // This call is internal, so caller has already tested failure cases. + +#if USE_LIBCOMPRESSION + const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry.fileIndex); + if (!data) { + return false; + } + // need to extra data and header + char scratchBuffer[compression_decode_scratch_buffer_size(COMPRESSION_ZLIB)]; + + uint64_t bytesDecoded = compression_decode_buffer( + (uint8_t*)buffer, entry.uncompressedSize, + (const uint8_t*)data, entry.compressedSize, + scratchBuffer, + COMPRESSION_ZLIB); + + bool success = false; + if (bytesDecoded == entry.uncompressedSize) { + success = true; + } +#else + + // this pulls pages from mmap, no allocations + mz_bool success = mz_zip_reader_extract_to_mem( + zip.get(), entry.fileIndex, buffer, bufferSize, 0); +#endif + + return success; +} + +// uncompressed content in the archive can be aliased directly by offset into the archive +bool ZipHelper::extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const +{ + auto entry = zipEntry(filename); + if (!entry) { + return false; + } + + mz_zip_archive_file_stat stat; + mz_zip_reader_file_stat(zip.get(), entry->fileIndex, &stat); + if (stat.m_is_directory || !stat.m_is_supported) { + return false; + } + + // this should really be cached with zipEntry data + const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry->fileIndex); + if (!data) { + return false; + } + + *bufferData = data; + + // This isn't correct, need to return comp_size. + // Caller may need the uncompressed size though to decompress fully into. + //bufferDataSize = stat.m_uncomp_size; + bufferDataSize = stat.m_comp_size; + + return true; +} + +} // namespace kram diff --git a/kram-profile/Source/KramZipHelper.h b/kram-profile/Source/KramZipHelper.h new file mode 100644 index 00000000..0dd7d13c --- /dev/null +++ b/kram-profile/Source/KramZipHelper.h @@ -0,0 +1,82 @@ +#pragma once + +// TODO: move to KramConfig.h +#define KRAM_MAC 1 +#define KRAM_IOS 0 +#define STL_NAMESPACE std + +#include + +#include +#include +#include + +// from miniz +// had to change miniz from anonymous struct typedef, or can't fwd declare +struct mz_zip_archive; + +namespace kram { + +//struct MmapHelper; +using namespace STL_NAMESPACE; + +struct ZipEntry { + const char* filename; // max 512, aliased + int32_t fileIndex; + + // attributes + uint64_t uncompressedSize; + uint64_t compressedSize; + int32_t modificationDate; + uint32_t crc32; +}; + +// this does very fast zip archive reading via miniz and mmap +// provides data structures to help lookup content +struct ZipHelper { + ZipHelper(); + ~ZipHelper(); + + bool openForRead(const uint8_t* zipData, uint64_t zipDataSize); + void close(); + + // Only keep entries that match the extensions provided + void filterExtensions(const vector& extensions); + + // buffer is resized if smaller, can use to lookat headers (f.e. ktx or mod) + // the zip decodes only the length of the buffer passed in, and this should be small + // since an iterator is called once to extract data + bool extractPartial(const char* filename, vector& buffer) const; + + // must read the entire contents (resizes vector) + bool extract(const char* filename, vector& buffer) const; + + // must read the entire contents + bool extract(const char* filename, uint8_t* bufferData, uint64_t bufferDataSize) const; + + // uncompressed content in the archive like ktx2 files can be aliased directly + // while referencing this data, don't close mmap() since bufferData is offset into that + bool extractRaw(const char* filename, const uint8_t** bufferData, uint64_t& bufferDataSize) const; + + const vector& zipEntrys() const { return _zipEntrys; } + + const ZipEntry* zipEntry(const char* name) const; + +private: + bool extract(const ZipEntry& fileIndex, void* buffer, uint64_t bufferSize) const; + + void initZipEntryTables(); + + // returns -1 if file not found, does binary search off sorted names + // to find fileIndex, then lookups the array index from that + int32_t zipEntryIndex(const char* name) const; + +private: + std::unique_ptr zip; + vector _zipEntrys; + + const uint8_t* zipData; // aliased + + vector allFilenames; +}; +} // namespace kram diff --git a/kram-profile/Source/KramZipHelperW.h b/kram-profile/Source/KramZipHelperW.h new file mode 100644 index 00000000..72cfe2c2 --- /dev/null +++ b/kram-profile/Source/KramZipHelperW.h @@ -0,0 +1,42 @@ +#pragma once + +#import + +typedef struct ZipEntryW { + const char* _Nonnull filename; // max 512, aliased + int32_t fileIndex; + + // attributes + uint64_t uncompressedSize; + uint64_t compressedSize; + int32_t modificationDate; + uint32_t crc32; +} ZipEntryW; + +// Use this to bridge the C++ over to Swift for now +// TODO: form a clang module and reference C++ directly +@interface ZipHelperW : NSObject +- (nonnull instancetype)initWithData:(nonnull NSData*)data; + +// extract the data. Can alias into the file. +- (nullable NSData*)extract:(nonnull const char*)filename; + +// pass back vector this way for now, should be property +- (nonnull const ZipEntryW*)zipEntrys; + +- (NSInteger)zipEntrysCount; + +// This isn't the fileIndex, but uses count above to avoid needing to do unsafe +- (ZipEntryW)zipEntry:(NSInteger)index; + +// retrieve an entry by filename +- (ZipEntryW)zipEntryByName:(nonnull const char*)name; + +@end + +// This is only needed for OptFunction and backend names +const char* _Nullable demangleSymbolName(const char* _Nonnull symbolName_); + +// This is really the only call needed out of CBA +// Convert templated code to collapsed name so get more correspondence in map. +const char* _Nullable collapseFunctionName(const char* _Nonnull name_); diff --git a/kram-profile/Source/KramZipHelperW.mm b/kram-profile/Source/KramZipHelperW.mm new file mode 100644 index 00000000..fa232d5e --- /dev/null +++ b/kram-profile/Source/KramZipHelperW.mm @@ -0,0 +1,73 @@ +#include "KramZipHelperW.h" + +#include "KramZipHelper.h" + +using namespace kram; + +@implementation ZipHelperW { + ZipHelper _helper; +} + +- (nonnull instancetype)initWithData:(nonnull NSData*)data +{ + _helper.openForRead((const uint8_t*)data.bytes, data.length); + return self; +} + +- (nullable NSData*)extract:(nonnull const char*)filename +{ + NSData* data = nil; + + auto entry = _helper.zipEntry(filename); + if (!entry) { + return nil; + } + + bool isCompressed = entry->uncompressedSize != entry->compressedSize; + if (isCompressed) { + // this allocates memory + data = [NSMutableData dataWithLength:entry->uncompressedSize]; + _helper.extract(filename, (uint8_t*)data.bytes, data.length); + } + else { + const uint8_t* bytes = nullptr; + uint64_t bytesLength = 0; + + // this aliases the archive + _helper.extractRaw(filename, &bytes, bytesLength); + data = [NSData dataWithBytesNoCopy:(void*)bytes length:bytesLength freeWhenDone:NO]; + } + + return data; +} + +// Need this for the list data +- (nonnull const ZipEntryW*)zipEntrys +{ + return (const ZipEntryW*)_helper.zipEntrys().data(); +} +- (NSInteger)zipEntrysCount +{ + return _helper.zipEntrys().size(); +} + +- (ZipEntryW)zipEntry:(NSInteger)index +{ + return *(const ZipEntryW*)&_helper.zipEntrys()[index]; +} + +- (ZipEntryW)zipEntryByName:(nonnull const char*)name +{ + // DONE: fix to return a dummy type, since zips can be missing files + // from one iteration to the next. + static ZipEntryW nilEntry = {""}; + const ZipEntry* entry = _helper.zipEntry(name); + if (entry) { + return *(const ZipEntryW*)entry; + } + else { + return nilEntry; + } +} + +@end diff --git a/kram-profile/Source/kram-profile-Bridging-Header.h b/kram-profile/Source/kram-profile-Bridging-Header.h new file mode 100644 index 00000000..b99d6e26 --- /dev/null +++ b/kram-profile/Source/kram-profile-Bridging-Header.h @@ -0,0 +1,6 @@ +// +// Use this file to import your target's public headers that you would like to expose to Swift. +// + +#include "CBA.h" +#include "KramZipHelperW.h" diff --git a/kram-profile/Source/miniz.cpp b/kram-profile/Source/miniz.cpp new file mode 100644 index 00000000..a62263fc --- /dev/null +++ b/kram-profile/Source/miniz.cpp @@ -0,0 +1,7766 @@ +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + +#include "miniz.h" + +typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; +typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; +typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- zlib-style API's */ + +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) +{ + mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); + size_t block_len = buf_len % 5552; + if (!ptr) + return MZ_ADLER32_INIT; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0]; s2 += s1; + s1 += ptr[1]; s2 += s1; + s1 += ptr[2]; s2 += s1; + s1 += ptr[3]; s2 += s1; + s1 += ptr[4]; s2 += s1; + s1 += ptr[5]; s2 += s1; + s1 += ptr[6]; s2 += s1; + s1 += ptr[7]; s2 += s1; + } + for (; i < block_len; ++i) { + s1 += *ptr++; s2 += s1; + } + s1 %= 65521U; s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + return (s2 << 16) + s1; +} + +/* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */ +#if 0 + mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) + { + static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, + 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; + mz_uint32 crcu32 = (mz_uint32)crc; + if (!ptr) + return MZ_CRC32_INIT; + crcu32 = ~crcu32; + while (buf_len--) + { + mz_uint8 b = *ptr++; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; + crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; + } + return ~crcu32; + } +#else +/* Faster, but larger CPU cache footprint. + */ +mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) +{ + static const mz_uint32 s_crc_table[256] = + { + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, + 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, + 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, + 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, + 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, + 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, + 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, + 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, + 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, + 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, + 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, + 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, + 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, + 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, + 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, + 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, + 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, + 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, + 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, + 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, + 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, + 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, + 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, + 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, + 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, + 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, + 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, + 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, + 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, + 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, + 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, + 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, + 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D + }; + + mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF; + const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr; + + while (buf_len >= 4) + { + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF]; + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF]; + pByte_buf += 4; + buf_len -= 4; + } + + while (buf_len) + { + crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF]; + ++pByte_buf; + --buf_len; + } + + return ~crc32; +} +#endif + +void mz_free(void *p) +{ + MZ_FREE(p); +} + +void *miniz_def_alloc_func(void *opaque, size_t items, size_t size) +{ + (void)opaque, (void)items, (void)size; + return MZ_MALLOC(items * size); +} +void miniz_def_free_func(void *opaque, void *address) +{ + (void)opaque, (void)address; + MZ_FREE(address); +} +void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size) +{ + (void)opaque, (void)address, (void)items, (void)size; + return MZ_REALLOC(address, items * size); +} + +const char *mz_version(void) +{ + return MZ_VERSION; +} + +#ifndef MINIZ_NO_ZLIB_APIS + +int mz_deflateInit(mz_streamp pStream, int level) +{ + return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); +} + +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) +{ + tdefl_compressor *pComp; + mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); + + if (!pStream) + return MZ_STREAM_ERROR; + if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = MZ_ADLER32_INIT; + pStream->msg = NULL; + pStream->reserved = 0; + pStream->total_in = 0; + pStream->total_out = 0; + if (!pStream->zalloc) + pStream->zalloc = miniz_def_alloc_func; + if (!pStream->zfree) + pStream->zfree = miniz_def_free_func; + + pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pComp; + + if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) + { + mz_deflateEnd(pStream); + return MZ_PARAM_ERROR; + } + + return MZ_OK; +} + +int mz_deflateReset(mz_streamp pStream) +{ + if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) + return MZ_STREAM_ERROR; + pStream->total_in = pStream->total_out = 0; + tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags); + return MZ_OK; +} + +int mz_deflate(mz_streamp pStream, int flush) +{ + size_t in_bytes, out_bytes; + mz_ulong orig_total_in, orig_total_out; + int mz_status = MZ_OK; + + if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) + return MZ_STREAM_ERROR; + if (!pStream->avail_out) + return MZ_BUF_ERROR; + + if (flush == MZ_PARTIAL_FLUSH) + flush = MZ_SYNC_FLUSH; + + if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) + return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; + + orig_total_in = pStream->total_in; + orig_total_out = pStream->total_out; + for (;;) + { + tdefl_status defl_status; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + + defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); + + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (defl_status < 0) + { + mz_status = MZ_STREAM_ERROR; + break; + } + else if (defl_status == TDEFL_STATUS_DONE) + { + mz_status = MZ_STREAM_END; + break; + } + else if (!pStream->avail_out) + break; + else if ((!pStream->avail_in) && (flush != MZ_FINISH)) + { + if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) + break; + return MZ_BUF_ERROR; /* Can't make forward progress without some input. + */ + } + } + return mz_status; +} + +int mz_deflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) +{ + (void)pStream; + /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */ + return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); +} + +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level) +{ + int status; + mz_stream stream; + memset(&stream, 0, sizeof(stream)); + + /* In case mz_ulong is 64-bits (argh I hate longs). */ + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_deflateInit(&stream, level); + if (status != MZ_OK) + return status; + + status = mz_deflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_deflateEnd(&stream); + return (status == MZ_OK) ? MZ_BUF_ERROR : status; + } + + *pDest_len = stream.total_out; + return mz_deflateEnd(&stream); +} + +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); +} + +mz_ulong mz_compressBound(mz_ulong source_len) +{ + return mz_deflateBound(NULL, source_len); +} + +typedef struct +{ + tinfl_decompressor m_decomp; + mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; + int m_window_bits; + mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; + tinfl_status m_last_status; +} inflate_state; + +int mz_inflateInit2(mz_streamp pStream, int window_bits) +{ + inflate_state *pDecomp; + if (!pStream) + return MZ_STREAM_ERROR; + if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) + return MZ_PARAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + if (!pStream->zalloc) + pStream->zalloc = miniz_def_alloc_func; + if (!pStream->zfree) + pStream->zfree = miniz_def_free_func; + + pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); + if (!pDecomp) + return MZ_MEM_ERROR; + + pStream->state = (struct mz_internal_state *)pDecomp; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + pDecomp->m_window_bits = window_bits; + + return MZ_OK; +} + +int mz_inflateInit(mz_streamp pStream) +{ + return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); +} + +int mz_inflateReset(mz_streamp pStream) +{ + inflate_state *pDecomp; + if (!pStream) + return MZ_STREAM_ERROR; + + pStream->data_type = 0; + pStream->adler = 0; + pStream->msg = NULL; + pStream->total_in = 0; + pStream->total_out = 0; + pStream->reserved = 0; + + pDecomp = (inflate_state *)pStream->state; + + tinfl_init(&pDecomp->m_decomp); + pDecomp->m_dict_ofs = 0; + pDecomp->m_dict_avail = 0; + pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; + pDecomp->m_first_call = 1; + pDecomp->m_has_flushed = 0; + /* pDecomp->m_window_bits = window_bits */; + + return MZ_OK; +} + +int mz_inflate(mz_streamp pStream, int flush) +{ + inflate_state *pState; + mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; + size_t in_bytes, out_bytes, orig_avail_in; + tinfl_status status; + + if ((!pStream) || (!pStream->state)) + return MZ_STREAM_ERROR; + if (flush == MZ_PARTIAL_FLUSH) + flush = MZ_SYNC_FLUSH; + if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + + pState = (inflate_state *)pStream->state; + if (pState->m_window_bits > 0) + decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; + orig_avail_in = pStream->avail_in; + + first_call = pState->m_first_call; + pState->m_first_call = 0; + if (pState->m_last_status < 0) + return MZ_DATA_ERROR; + + if (pState->m_has_flushed && (flush != MZ_FINISH)) + return MZ_STREAM_ERROR; + pState->m_has_flushed |= (flush == MZ_FINISH); + + if ((flush == MZ_FINISH) && (first_call)) + { + /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */ + decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; + in_bytes = pStream->avail_in; + out_bytes = pStream->avail_out; + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); + pState->m_last_status = status; + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + pStream->next_out += (mz_uint)out_bytes; + pStream->avail_out -= (mz_uint)out_bytes; + pStream->total_out += (mz_uint)out_bytes; + + if (status < 0) + return MZ_DATA_ERROR; + else if (status != TINFL_STATUS_DONE) + { + pState->m_last_status = TINFL_STATUS_FAILED; + return MZ_BUF_ERROR; + } + return MZ_STREAM_END; + } + /* flush != MZ_FINISH then we must assume there's more input. */ + if (flush != MZ_FINISH) + decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; + + if (pState->m_dict_avail) + { + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; + } + + for (;;) + { + in_bytes = pStream->avail_in; + out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; + + status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); + pState->m_last_status = status; + + pStream->next_in += (mz_uint)in_bytes; + pStream->avail_in -= (mz_uint)in_bytes; + pStream->total_in += (mz_uint)in_bytes; + pStream->adler = tinfl_get_adler32(&pState->m_decomp); + + pState->m_dict_avail = (mz_uint)out_bytes; + + n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); + memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); + pStream->next_out += n; + pStream->avail_out -= n; + pStream->total_out += n; + pState->m_dict_avail -= n; + pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); + + if (status < 0) + return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */ + else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) + return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */ + else if (flush == MZ_FINISH) + { + /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */ + if (status == TINFL_STATUS_DONE) + return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; + /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */ + else if (!pStream->avail_out) + return MZ_BUF_ERROR; + } + else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) + break; + } + + return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; +} + +int mz_inflateEnd(mz_streamp pStream) +{ + if (!pStream) + return MZ_STREAM_ERROR; + if (pStream->state) + { + pStream->zfree(pStream->opaque, pStream->state); + pStream->state = NULL; + } + return MZ_OK; +} + +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len) +{ + mz_stream stream; + int status; + memset(&stream, 0, sizeof(stream)); + + /* In case mz_ulong is 64-bits (argh I hate longs). */ + if ((source_len | *pDest_len) > 0xFFFFFFFFU) + return MZ_PARAM_ERROR; + + stream.next_in = pSource; + stream.avail_in = (mz_uint32)source_len; + stream.next_out = pDest; + stream.avail_out = (mz_uint32)*pDest_len; + + status = mz_inflateInit(&stream); + if (status != MZ_OK) + return status; + + status = mz_inflate(&stream, MZ_FINISH); + if (status != MZ_STREAM_END) + { + mz_inflateEnd(&stream); + return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; + } + *pDest_len = stream.total_out; + + return mz_inflateEnd(&stream); +} + +const char *mz_error(int err) +{ + static struct + { + int m_err; + const char *m_pDesc; + } s_error_descs[] = + { + { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" } + }; + mz_uint i; + for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) + if (s_error_descs[i].m_err == err) + return s_error_descs[i].m_pDesc; + return NULL; +} + +#endif /*MINIZ_NO_ZLIB_APIS */ + +#ifdef __cplusplus +} +#endif + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + + + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- Low-level Compression (independent from all decompression API's) */ + +/* Purposely making these tables static for faster init and thread safety. */ +static const mz_uint16 s_tdefl_len_sym[256] = + { + 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, + 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, + 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, + 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, + 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, + 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 + }; + +static const mz_uint8 s_tdefl_len_extra[256] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 + }; + +static const mz_uint8 s_tdefl_small_dist_sym[512] = + { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 + }; + +static const mz_uint8 s_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + +static const mz_uint8 s_tdefl_large_dist_sym[128] = + { + 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 + }; + +static const mz_uint8 s_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + +/* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */ +typedef struct +{ + mz_uint16 m_key, m_sym_index; +} tdefl_sym_freq; +static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1) +{ + mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; + tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; + MZ_CLEAR_OBJ(hist); + for (i = 0; i < num_syms; i++) + { + mz_uint freq = pSyms0[i].m_key; + hist[freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) + total_passes--; + for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const mz_uint32 *pHist = &hist[pass << 8]; + mz_uint offsets[256], cur_ofs = 0; + for (i = 0; i < 256; i++) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + } + for (i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + { + tdefl_sym_freq *t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + } + return pCur_syms; +} + +/* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */ +static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n == 0) + return; + else if (n == 1) + { + A[0].m_key = 1; + return; + } + A[0].m_key += A[1].m_key; + root = 0; + leaf = 2; + for (next = 1; next < n - 1; next++) + { + if (leaf >= n || A[root].m_key < A[leaf].m_key) + { + A[next].m_key = A[root].m_key; + A[root++].m_key = (mz_uint16)next; + } + else + A[next].m_key = A[leaf++].m_key; + if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) + { + A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); + A[root++].m_key = (mz_uint16)next; + } + else + A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); + } + A[n - 2].m_key = 0; + for (next = n - 3; next >= 0; next--) + A[next].m_key = A[A[next].m_key].m_key + 1; + avbl = 1; + used = dpth = 0; + root = n - 2; + next = n - 1; + while (avbl > 0) + { + while (root >= 0 && (int)A[root].m_key == dpth) + { + used++; + root--; + } + while (avbl > used) + { + A[next--].m_key = (mz_uint16)(dpth); + avbl--; + } + avbl = 2 * used; + dpth++; + used = 0; + } +} + +/* Limits canonical Huffman code table's max code size. */ +enum +{ + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 +}; +static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + int i; + mz_uint32 total = 0; + if (code_list_len <= 1) + return; + for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) + pNum_codes[max_code_size] += pNum_codes[i]; + for (i = max_code_size; i > 0; i--) + total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (i = max_code_size - 1; i > 0; i--) + if (pNum_codes[i]) + { + pNum_codes[i]--; + pNum_codes[i + 1] += 2; + break; + } + total--; + } +} + +static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table) +{ + int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; + mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; + MZ_CLEAR_OBJ(num_codes); + if (static_table) + { + for (i = 0; i < table_len; i++) + num_codes[d->m_huff_code_sizes[table_num][i]]++; + } + else + { + tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; + int num_used_syms = 0; + const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; + for (i = 0; i < table_len; i++) + if (pSym_count[i]) + { + syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; + syms0[num_used_syms++].m_sym_index = (mz_uint16)i; + } + + pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); + tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); + + for (i = 0; i < num_used_syms; i++) + num_codes[pSyms[i].m_key]++; + + tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); + + MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); + MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); + for (i = 1, j = num_used_syms; i <= code_size_limit; i++) + for (l = num_codes[i]; l > 0; l--) + d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); + } + + next_code[1] = 0; + for (j = 0, i = 2; i <= code_size_limit; i++) + next_code[i] = j = ((j + num_codes[i - 1]) << 1); + + for (i = 0; i < table_len; i++) + { + mz_uint rev_code = 0, code, code_size; + if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) + continue; + code = next_code[code_size]++; + for (l = code_size; l > 0; l--, code >>= 1) + rev_code = (rev_code << 1) | (code & 1); + d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; + } +} + +#define TDEFL_PUT_BITS(b, l) \ + do \ + { \ + mz_uint bits = b; \ + mz_uint len = l; \ + MZ_ASSERT(bits <= ((1U << len) - 1U)); \ + d->m_bit_buffer |= (bits << d->m_bits_in); \ + d->m_bits_in += len; \ + while (d->m_bits_in >= 8) \ + { \ + if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ + *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ + d->m_bit_buffer >>= 8; \ + d->m_bits_in -= 8; \ + } \ + } \ + MZ_MACRO_END + +#define TDEFL_RLE_PREV_CODE_SIZE() \ + { \ + if (rle_repeat_count) \ + { \ + if (rle_repeat_count < 3) \ + { \ + d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ + while (rle_repeat_count--) \ + packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ + } \ + else \ + { \ + d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 16; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ + } \ + rle_repeat_count = 0; \ + } \ + } + +#define TDEFL_RLE_ZERO_CODE_SIZE() \ + { \ + if (rle_z_count) \ + { \ + if (rle_z_count < 3) \ + { \ + d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ + while (rle_z_count--) \ + packed_code_sizes[num_packed_code_sizes++] = 0; \ + } \ + else if (rle_z_count <= 10) \ + { \ + d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 17; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ + } \ + else \ + { \ + d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ + packed_code_sizes[num_packed_code_sizes++] = 18; \ + packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ + } \ + rle_z_count = 0; \ + } \ + } + +static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +static void tdefl_start_dynamic_block(tdefl_compressor *d) +{ + int num_lit_codes, num_dist_codes, num_bit_lengths; + mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; + mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; + + d->m_huff_count[0][256] = 1; + + tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); + tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); + + for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) + if (d->m_huff_code_sizes[0][num_lit_codes - 1]) + break; + for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) + if (d->m_huff_code_sizes[1][num_dist_codes - 1]) + break; + + memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); + memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); + total_code_sizes_to_pack = num_lit_codes + num_dist_codes; + num_packed_code_sizes = 0; + rle_z_count = 0; + rle_repeat_count = 0; + + memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); + for (i = 0; i < total_code_sizes_to_pack; i++) + { + mz_uint8 code_size = code_sizes_to_pack[i]; + if (!code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + if (++rle_z_count == 138) + { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + if (code_size != prev_code_size) + { + TDEFL_RLE_PREV_CODE_SIZE(); + d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); + packed_code_sizes[num_packed_code_sizes++] = code_size; + } + else if (++rle_repeat_count == 6) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + } + prev_code_size = code_size; + } + if (rle_repeat_count) + { + TDEFL_RLE_PREV_CODE_SIZE(); + } + else + { + TDEFL_RLE_ZERO_CODE_SIZE(); + } + + tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); + + TDEFL_PUT_BITS(2, 2); + + TDEFL_PUT_BITS(num_lit_codes - 257, 5); + TDEFL_PUT_BITS(num_dist_codes - 1, 5); + + for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) + if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) + break; + num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); + TDEFL_PUT_BITS(num_bit_lengths - 4, 4); + for (i = 0; (int)i < num_bit_lengths; i++) + TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); + + for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;) + { + mz_uint code = packed_code_sizes[packed_code_sizes_index++]; + MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); + TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); + if (code >= 16) + TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); + } +} + +static void tdefl_start_static_block(tdefl_compressor *d) +{ + mz_uint i; + mz_uint8 *p = &d->m_huff_code_sizes[0][0]; + + for (i = 0; i <= 143; ++i) + *p++ = 8; + for (; i <= 255; ++i) + *p++ = 9; + for (; i <= 279; ++i) + *p++ = 7; + for (; i <= 287; ++i) + *p++ = 8; + + memset(d->m_huff_code_sizes[1], 5, 32); + + tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); + tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); + + TDEFL_PUT_BITS(1, 2); +} + +static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF }; + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + mz_uint8 *pOutput_buf = d->m_pOutput_buf; + mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; + mz_uint64 bit_buffer = d->m_bit_buffer; + mz_uint bits_in = d->m_bits_in; + +#define TDEFL_PUT_BITS_FAST(b, l) \ + { \ + bit_buffer |= (((mz_uint64)(b)) << bits_in); \ + bits_in += (l); \ + } + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + + if (flags & 1) + { + mz_uint s0, s1, n0, n1, sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + /* This sequence coaxes MSVC into using cmov's vs. jmp's. */ + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + n0 = s_tdefl_small_dist_extra[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[match_dist >> 8]; + n1 = s_tdefl_large_dist_extra[match_dist >> 8]; + sym = (match_dist < 512) ? s0 : s1; + num_extra_bits = (match_dist < 512) ? n0 : n1; + + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + + if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) + { + flags >>= 1; + lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + } + + if (pOutput_buf >= d->m_pOutput_buf_end) + return MZ_FALSE; + + *(mz_uint64 *)pOutput_buf = bit_buffer; + pOutput_buf += (bits_in >> 3); + bit_buffer >>= (bits_in & ~7); + bits_in &= 7; + } + +#undef TDEFL_PUT_BITS_FAST + + d->m_pOutput_buf = pOutput_buf; + d->m_bits_in = 0; + d->m_bit_buffer = 0; + + while (bits_in) + { + mz_uint32 n = MZ_MIN(bits_in, 16); + TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); + bit_buffer >>= n; + bits_in -= n; + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#else +static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) +{ + mz_uint flags; + mz_uint8 *pLZ_codes; + + flags = 1; + for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) + { + if (flags == 1) + flags = *pLZ_codes++ | 0x100; + if (flags & 1) + { + mz_uint sym, num_extra_bits; + mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); + pLZ_codes += 3; + + MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); + TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); + + if (match_dist < 512) + { + sym = s_tdefl_small_dist_sym[match_dist]; + num_extra_bits = s_tdefl_small_dist_extra[match_dist]; + } + else + { + sym = s_tdefl_large_dist_sym[match_dist >> 8]; + num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; + } + MZ_ASSERT(d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); + TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); + } + else + { + mz_uint lit = *pLZ_codes++; + MZ_ASSERT(d->m_huff_code_sizes[0][lit]); + TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); + } + } + + TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); + + return (d->m_pOutput_buf < d->m_pOutput_buf_end); +} +#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */ + +static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) +{ + if (static_block) + tdefl_start_static_block(d); + else + tdefl_start_dynamic_block(d); + return tdefl_compress_lz_codes(d); +} + +static int tdefl_flush_block(tdefl_compressor *d, int flush) +{ + mz_uint saved_bit_buf, saved_bits_in; + mz_uint8 *pSaved_output_buf; + mz_bool comp_block_succeeded = MZ_FALSE; + int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; + mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; + + d->m_pOutput_buf = pOutput_buf_start; + d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; + + MZ_ASSERT(!d->m_output_flush_remaining); + d->m_output_flush_ofs = 0; + d->m_output_flush_remaining = 0; + + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); + d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); + + if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) + { + TDEFL_PUT_BITS(0x78, 8); + TDEFL_PUT_BITS(0x01, 8); + } + + TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); + + pSaved_output_buf = d->m_pOutput_buf; + saved_bit_buf = d->m_bit_buffer; + saved_bits_in = d->m_bits_in; + + if (!use_raw_block) + comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); + + /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */ + if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && + ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) + { + mz_uint i; + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf; d->m_bits_in = saved_bits_in; + TDEFL_PUT_BITS(0, 2); + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) + { + TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); + } + for (i = 0; i < d->m_total_lz_bytes; ++i) + { + TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); + } + } + /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */ + else if (!comp_block_succeeded) + { + d->m_pOutput_buf = pSaved_output_buf; + d->m_bit_buffer = saved_bit_buf; d->m_bits_in = saved_bits_in; + tdefl_compress_block(d, MZ_TRUE); + } + + if (flush) + { + if (flush == TDEFL_FINISH) + { + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) + { + mz_uint i, a = d->m_adler32; + for (i = 0; i < 4; i++) + { + TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); + a <<= 8; + } + } + } + else + { + mz_uint i, z = 0; + TDEFL_PUT_BITS(0, 3); + if (d->m_bits_in) + { + TDEFL_PUT_BITS(0, 8 - d->m_bits_in); + } + for (i = 2; i; --i, z ^= 0xFFFF) + { + TDEFL_PUT_BITS(z & 0xFFFF, 16); + } + } + } + + MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); + + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; + d->m_total_lz_bytes = 0; + d->m_block_index++; + + if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) + { + if (d->m_pPut_buf_func) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) + return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); + } + else if (pOutput_buf_start == d->m_output_buf) + { + int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); + d->m_out_buf_ofs += bytes_to_copy; + if ((n -= bytes_to_copy) != 0) + { + d->m_output_flush_ofs = bytes_to_copy; + d->m_output_flush_remaining = n; + } + } + else + { + d->m_out_buf_ofs += n; + } + } + + return d->m_output_flush_remaining; +} + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES +#ifdef MINIZ_UNALIGNED_USE_MEMCPY +static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p) +{ + mz_uint16 ret; + memcpy(&ret, p, sizeof(mz_uint16)); + return ret; +} +#else +#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) +#define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p) +#endif +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; + mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s); + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) + return; + for (;;) + { + for (;;) + { + if (--num_probes_left == 0) + return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) + break; + q = (const mz_uint16 *)(d->m_dict + probe_pos); + if (TDEFL_READ_UNALIGNED_WORD2(q) != s01) + continue; + p = s; + probe_len = 32; + do + { + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); + if (!probe_len) + { + *pMatch_dist = dist; + *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN); + break; + } + else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len) + { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) + break; + c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); + } + } +} +#else +static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) +{ + mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; + mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; + const mz_uint8 *s = d->m_dict + pos, *p, *q; + mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; + MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); + if (max_match_len <= match_len) + return; + for (;;) + { + for (;;) + { + if (--num_probes_left == 0) + return; +#define TDEFL_PROBE \ + next_probe_pos = d->m_next[probe_pos]; \ + if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ + return; \ + probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ + if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \ + break; + TDEFL_PROBE; + TDEFL_PROBE; + TDEFL_PROBE; + } + if (!dist) + break; + p = s; + q = d->m_dict + probe_pos; + for (probe_len = 0; probe_len < max_match_len; probe_len++) + if (*p++ != *q++) + break; + if (probe_len > match_len) + { + *pMatch_dist = dist; + if ((*pMatch_len = match_len = probe_len) == max_match_len) + return; + c0 = d->m_dict[pos + match_len]; + c1 = d->m_dict[pos + match_len - 1]; + } + } +} +#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */ + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#ifdef MINIZ_UNALIGNED_USE_MEMCPY +static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p) +{ + mz_uint32 ret; + memcpy(&ret, p, sizeof(mz_uint32)); + return ret; +} +#else +#define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p) +#endif +static mz_bool tdefl_compress_fast(tdefl_compressor *d) +{ + /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */ + mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; + mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; + mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + + while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) + { + const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; + mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); + d->m_src_buf_left -= num_bytes_to_process; + lookahead_size += num_bytes_to_process; + + while (num_bytes_to_process) + { + mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); + memcpy(d->m_dict + dst_pos, d->m_pSrc, n); + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); + d->m_pSrc += n; + dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; + num_bytes_to_process -= n; + } + + dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); + if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) + break; + + while (lookahead_size >= 4) + { + mz_uint cur_match_dist, cur_match_len = 1; + mz_uint8 *pCur_dict = d->m_dict + cur_pos; + mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF; + mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; + mz_uint probe_pos = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)lookahead_pos; + + if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) + { + const mz_uint16 *p = (const mz_uint16 *)pCur_dict; + const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); + mz_uint32 probe_len = 32; + do + { + } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && + (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0)); + cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); + if (!probe_len) + cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; + + if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))) + { + cur_match_len = 1; + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + else + { + mz_uint32 s0, s1; + cur_match_len = MZ_MIN(cur_match_len, lookahead_size); + + MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); + + cur_match_dist--; + + pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); +#ifdef MINIZ_UNALIGNED_USE_MEMCPY + memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist)); +#else + *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; +#endif + pLZ_code_buf += 3; + *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); + + s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; + s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; + d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; + + d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; + } + } + else + { + *pLZ_code_buf++ = (mz_uint8)first_trigram; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + d->m_huff_count[0][(mz_uint8)first_trigram]++; + } + + if (--num_flags_left == 0) + { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + total_lz_bytes += cur_match_len; + lookahead_pos += cur_match_len; + dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; + MZ_ASSERT(lookahead_size >= cur_match_len); + lookahead_size -= cur_match_len; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + + while (lookahead_size) + { + mz_uint8 lit = d->m_dict[cur_pos]; + + total_lz_bytes++; + *pLZ_code_buf++ = lit; + *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); + if (--num_flags_left == 0) + { + num_flags_left = 8; + pLZ_flags = pLZ_code_buf++; + } + + d->m_huff_count[0][lit]++; + + lookahead_pos++; + dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE); + cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + lookahead_size--; + + if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) + { + int n; + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + total_lz_bytes = d->m_total_lz_bytes; + pLZ_code_buf = d->m_pLZ_code_buf; + pLZ_flags = d->m_pLZ_flags; + num_flags_left = d->m_num_flags_left; + } + } + } + + d->m_lookahead_pos = lookahead_pos; + d->m_lookahead_size = lookahead_size; + d->m_dict_size = dict_size; + d->m_total_lz_bytes = total_lz_bytes; + d->m_pLZ_code_buf = pLZ_code_buf; + d->m_pLZ_flags = pLZ_flags; + d->m_num_flags_left = num_flags_left; + return MZ_TRUE; +} +#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ + +static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit) +{ + d->m_total_lz_bytes++; + *d->m_pLZ_code_buf++ = lit; + *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); + if (--d->m_num_flags_left == 0) + { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + d->m_huff_count[0][lit]++; +} + +static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist) +{ + mz_uint32 s0, s1; + + MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); + + d->m_total_lz_bytes += match_len; + + d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); + + match_dist -= 1; + d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); + d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); + d->m_pLZ_code_buf += 3; + + *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); + if (--d->m_num_flags_left == 0) + { + d->m_num_flags_left = 8; + d->m_pLZ_flags = d->m_pLZ_code_buf++; + } + + s0 = s_tdefl_small_dist_sym[match_dist & 511]; + s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; + d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; + + if (match_len >= TDEFL_MIN_MATCH_LEN) + d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; +} + +static mz_bool tdefl_compress_normal(tdefl_compressor *d) +{ + const mz_uint8 *pSrc = d->m_pSrc; + size_t src_buf_left = d->m_src_buf_left; + tdefl_flush flush = d->m_flush; + + while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) + { + mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; + /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */ + if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) + { + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; + mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; + mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); + const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; + src_buf_left -= num_bytes_to_process; + d->m_lookahead_size += num_bytes_to_process; + while (pSrc != pSrc_end) + { + mz_uint8 c = *pSrc++; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; + ins_pos++; + } + } + else + { + while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + { + mz_uint8 c = *pSrc++; + mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; + src_buf_left--; + d->m_dict[dst_pos] = c; + if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) + d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; + if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) + { + mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; + mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); + d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; + d->m_hash[hash] = (mz_uint16)(ins_pos); + } + } + } + d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); + if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) + break; + + /* Simple lazy/greedy parsing state machine. */ + len_to_move = 1; + cur_match_dist = 0; + cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); + cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; + if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) + { + if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) + { + mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; + cur_match_len = 0; + while (cur_match_len < d->m_lookahead_size) + { + if (d->m_dict[cur_pos + cur_match_len] != c) + break; + cur_match_len++; + } + if (cur_match_len < TDEFL_MIN_MATCH_LEN) + cur_match_len = 0; + else + cur_match_dist = 1; + } + } + else + { + tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); + } + if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) + { + cur_match_dist = cur_match_len = 0; + } + if (d->m_saved_match_len) + { + if (cur_match_len > d->m_saved_match_len) + { + tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); + if (cur_match_len >= 128) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + d->m_saved_match_len = 0; + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[cur_pos]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + } + else + { + tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); + len_to_move = d->m_saved_match_len - 1; + d->m_saved_match_len = 0; + } + } + else if (!cur_match_dist) + tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); + else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) + { + tdefl_record_match(d, cur_match_len, cur_match_dist); + len_to_move = cur_match_len; + } + else + { + d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; + d->m_saved_match_dist = cur_match_dist; + d->m_saved_match_len = cur_match_len; + } + /* Move the lookahead forward by len_to_move bytes. */ + d->m_lookahead_pos += len_to_move; + MZ_ASSERT(d->m_lookahead_size >= len_to_move); + d->m_lookahead_size -= len_to_move; + d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); + /* Check if it's time to flush the current LZ codes to the internal output buffer. */ + if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || + ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) + { + int n; + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + if ((n = tdefl_flush_block(d, 0)) != 0) + return (n < 0) ? MZ_FALSE : MZ_TRUE; + } + } + + d->m_pSrc = pSrc; + d->m_src_buf_left = src_buf_left; + return MZ_TRUE; +} + +static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) +{ + if (d->m_pIn_buf_size) + { + *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; + } + + if (d->m_pOut_buf_size) + { + size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); + memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); + d->m_output_flush_ofs += (mz_uint)n; + d->m_output_flush_remaining -= (mz_uint)n; + d->m_out_buf_ofs += n; + + *d->m_pOut_buf_size = d->m_out_buf_ofs; + } + + return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush) +{ + if (!d) + { + if (pIn_buf_size) + *pIn_buf_size = 0; + if (pOut_buf_size) + *pOut_buf_size = 0; + return TDEFL_STATUS_BAD_PARAM; + } + + d->m_pIn_buf = pIn_buf; + d->m_pIn_buf_size = pIn_buf_size; + d->m_pOut_buf = pOut_buf; + d->m_pOut_buf_size = pOut_buf_size; + d->m_pSrc = (const mz_uint8 *)(pIn_buf); + d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; + d->m_out_buf_ofs = 0; + d->m_flush = flush; + + if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || + (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf)) + { + if (pIn_buf_size) + *pIn_buf_size = 0; + if (pOut_buf_size) + *pOut_buf_size = 0; + return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); + } + d->m_wants_to_finish |= (flush == TDEFL_FINISH); + + if ((d->m_output_flush_remaining) || (d->m_finished)) + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN + if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && + ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && + ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) + { + if (!tdefl_compress_fast(d)) + return d->m_prev_return_status; + } + else +#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */ + { + if (!tdefl_compress_normal(d)) + return d->m_prev_return_status; + } + + if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) + d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf); + + if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) + { + if (tdefl_flush_block(d, flush) < 0) + return d->m_prev_return_status; + d->m_finished = (flush == TDEFL_FINISH); + if (flush == TDEFL_FULL_FLUSH) + { + MZ_CLEAR_OBJ(d->m_hash); + MZ_CLEAR_OBJ(d->m_next); + d->m_dict_size = 0; + } + } + + return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); +} + +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush) +{ + MZ_ASSERT(d->m_pPut_buf_func); + return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); +} + +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + d->m_pPut_buf_func = pPut_buf_func; + d->m_pPut_buf_user = pPut_buf_user; + d->m_flags = (mz_uint)(flags); + d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; + d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; + d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) + MZ_CLEAR_OBJ(d->m_hash); + d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; + d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; + d->m_pLZ_code_buf = d->m_lz_code_buf + 1; + d->m_pLZ_flags = d->m_lz_code_buf; + d->m_num_flags_left = 8; + d->m_pOutput_buf = d->m_output_buf; + d->m_pOutput_buf_end = d->m_output_buf; + d->m_prev_return_status = TDEFL_STATUS_OKAY; + d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; + d->m_adler32 = 1; + d->m_pIn_buf = NULL; + d->m_pOut_buf = NULL; + d->m_pIn_buf_size = NULL; + d->m_pOut_buf_size = NULL; + d->m_flush = TDEFL_NO_FLUSH; + d->m_pSrc = NULL; + d->m_src_buf_left = 0; + d->m_out_buf_ofs = 0; + if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) + MZ_CLEAR_OBJ(d->m_dict); + memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); + memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); + return TDEFL_STATUS_OKAY; +} + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) +{ + return d->m_prev_return_status; +} + +mz_uint32 tdefl_get_adler32(tdefl_compressor *d) +{ + return d->m_adler32; +} + +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + tdefl_compressor *pComp; + mz_bool succeeded; + if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) + return MZ_FALSE; + pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + if (!pComp) + return MZ_FALSE; + succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); + succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); + MZ_FREE(pComp); + return succeeded; +} + +typedef struct +{ + size_t m_size, m_capacity; + mz_uint8 *m_pBuf; + mz_bool m_expandable; +} tdefl_output_buffer; + +static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser) +{ + tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; + size_t new_size = p->m_size + len; + if (new_size > p->m_capacity) + { + size_t new_capacity = p->m_capacity; + mz_uint8 *pNew_buf; + if (!p->m_expandable) + return MZ_FALSE; + do + { + new_capacity = MZ_MAX(128U, new_capacity << 1U); + } while (new_size > new_capacity); + pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); + if (!pNew_buf) + return MZ_FALSE; + p->m_pBuf = pNew_buf; + p->m_capacity = new_capacity; + } + memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); + p->m_size = new_size; + return MZ_TRUE; +} + +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_len) + return MZ_FALSE; + else + *pOut_len = 0; + out_buf.m_expandable = MZ_TRUE; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return NULL; + *pOut_len = out_buf.m_size; + return out_buf.m_pBuf; +} + +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tdefl_output_buffer out_buf; + MZ_CLEAR_OBJ(out_buf); + if (!pOut_buf) + return 0; + out_buf.m_pBuf = (mz_uint8 *)pOut_buf; + out_buf.m_capacity = out_buf_len; + if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) + return 0; + return out_buf.m_size; +} + +static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + +/* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */ +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) +{ + mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); + if (window_bits > 0) + comp_flags |= TDEFL_WRITE_ZLIB_HEADER; + + if (!level) + comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; + else if (strategy == MZ_FILTERED) + comp_flags |= TDEFL_FILTER_MATCHES; + else if (strategy == MZ_HUFFMAN_ONLY) + comp_flags &= ~TDEFL_MAX_PROBES_MASK; + else if (strategy == MZ_FIXED) + comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; + else if (strategy == MZ_RLE) + comp_flags |= TDEFL_RLE_MATCHES; + + return comp_flags; +} + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */ +#endif + +/* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at + http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. + This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */ +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip) +{ + /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */ + static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 }; + tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); + tdefl_output_buffer out_buf; + int i, bpl = w * num_chans, y, z; + mz_uint32 c; + *pLen_out = 0; + if (!pComp) + return NULL; + MZ_CLEAR_OBJ(out_buf); + out_buf.m_expandable = MZ_TRUE; + out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); + if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) + { + MZ_FREE(pComp); + return NULL; + } + /* write dummy header */ + for (z = 41; z; --z) + tdefl_output_buffer_putter(&z, 1, &out_buf); + /* compress image data */ + tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); + for (y = 0; y < h; ++y) + { + tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); + tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); + } + if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) + { + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + /* write real header */ + *pLen_out = out_buf.m_size - 41; + { + static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 }; + mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d, + 0x0a, 0x1a, 0x0a, 0x00, 0x00, + 0x00, 0x0d, 0x49, 0x48, 0x44, + 0x52, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x49, 0x44, 0x41, + 0x54 }; + pnghdr[18] = (mz_uint8)(w >> 8); + pnghdr[19] = (mz_uint8)w; + pnghdr[22] = (mz_uint8)(h >> 8); + pnghdr[23] = (mz_uint8)h; + pnghdr[25] = chans[num_chans]; + pnghdr[33] = (mz_uint8)(*pLen_out >> 24); + pnghdr[34] = (mz_uint8)(*pLen_out >> 16); + pnghdr[35] = (mz_uint8)(*pLen_out >> 8); + pnghdr[36] = (mz_uint8)*pLen_out; + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); + for (i = 0; i < 4; ++i, c <<= 8) + ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); + memcpy(out_buf.m_pBuf, pnghdr, 41); + } + /* write footer (IDAT CRC-32, followed by IEND chunk) */ + if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) + { + *pLen_out = 0; + MZ_FREE(pComp); + MZ_FREE(out_buf.m_pBuf); + return NULL; + } + c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4); + for (i = 0; i < 4; ++i, c <<= 8) + (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); + /* compute final size of file, grab compressed data buffer and return */ + *pLen_out += 57; + MZ_FREE(pComp); + return out_buf.m_pBuf; +} +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out) +{ + /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */ + return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE); +} + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */ +/* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tdefl_compressor *tdefl_compressor_alloc() +{ + return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); +} + +void tdefl_compressor_free(tdefl_compressor *pComp) +{ + MZ_FREE(pComp); +} +#endif + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} +#endif +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- Low-level Decompression (completely independent from all compression API's) */ + +#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) +#define TINFL_MEMSET(p, c, l) memset(p, c, l) + +#define TINFL_CR_BEGIN \ + switch (r->m_state) \ + { \ + case 0: +#define TINFL_CR_RETURN(state_index, result) \ + do \ + { \ + status = result; \ + r->m_state = state_index; \ + goto common_exit; \ + case state_index:; \ + } \ + MZ_MACRO_END +#define TINFL_CR_RETURN_FOREVER(state_index, result) \ + do \ + { \ + for (;;) \ + { \ + TINFL_CR_RETURN(state_index, result); \ + } \ + } \ + MZ_MACRO_END +#define TINFL_CR_FINISH } + +#define TINFL_GET_BYTE(state_index, c) \ + do \ + { \ + while (pIn_buf_cur >= pIn_buf_end) \ + { \ + TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \ + } \ + c = *pIn_buf_cur++; \ + } \ + MZ_MACRO_END + +#define TINFL_NEED_BITS(state_index, n) \ + do \ + { \ + mz_uint c; \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < (mz_uint)(n)) +#define TINFL_SKIP_BITS(state_index, n) \ + do \ + { \ + if (num_bits < (mz_uint)(n)) \ + { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END +#define TINFL_GET_BITS(state_index, b, n) \ + do \ + { \ + if (num_bits < (mz_uint)(n)) \ + { \ + TINFL_NEED_BITS(state_index, n); \ + } \ + b = bit_buf & ((1 << (n)) - 1); \ + bit_buf >>= (n); \ + num_bits -= (n); \ + } \ + MZ_MACRO_END + +/* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */ +/* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */ +/* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */ +/* bit buffer contains >=15 bits (deflate's max. Huffman code size). */ +#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ + do \ + { \ + temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ + if (temp >= 0) \ + { \ + code_len = temp >> 9; \ + if ((code_len) && (num_bits >= code_len)) \ + break; \ + } \ + else if (num_bits > TINFL_FAST_LOOKUP_BITS) \ + { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do \ + { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while ((temp < 0) && (num_bits >= (code_len + 1))); \ + if (temp >= 0) \ + break; \ + } \ + TINFL_GET_BYTE(state_index, c); \ + bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ + num_bits += 8; \ + } while (num_bits < 15); + +/* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */ +/* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */ +/* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */ +/* The slow path is only executed at the very end of the input buffer. */ +/* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */ +/* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */ +#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ + do \ + { \ + int temp; \ + mz_uint code_len, c; \ + if (num_bits < 15) \ + { \ + if ((pIn_buf_end - pIn_buf_cur) < 2) \ + { \ + TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ + } \ + else \ + { \ + bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ + pIn_buf_cur += 2; \ + num_bits += 16; \ + } \ + } \ + if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ + code_len = temp >> 9, temp &= 511; \ + else \ + { \ + code_len = TINFL_FAST_LOOKUP_BITS; \ + do \ + { \ + temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ + } while (temp < 0); \ + } \ + sym = temp; \ + bit_buf >>= code_len; \ + num_bits -= code_len; \ + } \ + MZ_MACRO_END + +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags) +{ + static const int s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 }; + static const int s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 }; + static const int s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 }; + static const int s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; + static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + static const int s_min_table_sizes[3] = { 257, 1, 4 }; + + tinfl_status status = TINFL_STATUS_FAILED; + mz_uint32 num_bits, dist, counter, num_extra; + tinfl_bit_buf_t bit_buf; + const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; + mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; + size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; + + /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */ + if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) + { + *pIn_buf_size = *pOut_buf_size = 0; + return TINFL_STATUS_BAD_PARAM; + } + + num_bits = r->m_num_bits; + bit_buf = r->m_bit_buf; + dist = r->m_dist; + counter = r->m_counter; + num_extra = r->m_num_extra; + dist_from_out_buf_start = r->m_dist_from_out_buf_start; + TINFL_CR_BEGIN + + bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; + r->m_z_adler32 = r->m_check_adler32 = 1; + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + TINFL_GET_BYTE(1, r->m_zhdr0); + TINFL_GET_BYTE(2, r->m_zhdr1); + counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); + if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + // TODO: fix warning C4334: '<<': result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?) + counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); + if (counter) + { + TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); + } + } + + do + { + TINFL_GET_BITS(3, r->m_final, 3); + r->m_type = r->m_final >> 1; + if (r->m_type == 0) + { + TINFL_SKIP_BITS(5, num_bits & 7); + for (counter = 0; counter < 4; ++counter) + { + if (num_bits) + TINFL_GET_BITS(6, r->m_raw_header[counter], 8); + else + TINFL_GET_BYTE(7, r->m_raw_header[counter]); + } + if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) + { + TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); + } + while ((counter) && (num_bits)) + { + TINFL_GET_BITS(51, dist, 8); + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)dist; + counter--; + } + while (counter) + { + size_t n; + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); + } + while (pIn_buf_cur >= pIn_buf_end) + { + TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); + } + n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); + TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); + pIn_buf_cur += n; + pOut_buf_cur += n; + counter -= (mz_uint)n; + } + } + else if (r->m_type == 3) + { + TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); + } + else + { + if (r->m_type == 1) + { + mz_uint8 *p = r->m_tables[0].m_code_size; + mz_uint i; + r->m_table_sizes[0] = 288; + r->m_table_sizes[1] = 32; + TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); + for (i = 0; i <= 143; ++i) + *p++ = 8; + for (; i <= 255; ++i) + *p++ = 9; + for (; i <= 279; ++i) + *p++ = 7; + for (; i <= 287; ++i) + *p++ = 8; + } + else + { + for (counter = 0; counter < 3; counter++) + { + TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); + r->m_table_sizes[counter] += s_min_table_sizes[counter]; + } + MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); + for (counter = 0; counter < r->m_table_sizes[2]; counter++) + { + mz_uint s; + TINFL_GET_BITS(14, s, 3); + r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; + } + r->m_table_sizes[2] = 19; + } + for (; (int)r->m_type >= 0; r->m_type--) + { + int tree_next, tree_cur; + tinfl_huff_table *pTable; + mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; + pTable = &r->m_tables[r->m_type]; + MZ_CLEAR_OBJ(total_syms); + MZ_CLEAR_OBJ(pTable->m_look_up); + MZ_CLEAR_OBJ(pTable->m_tree); + for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) + total_syms[pTable->m_code_size[i]]++; + used_syms = 0; total = 0; + next_code[0] = next_code[1] = 0; + for (i = 1; i <= 15; ++i) + { + used_syms += total_syms[i]; + next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); + } + if ((65536 != total) && (used_syms > 1)) + { + TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); + } + for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) + { + mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; + if (!code_size) + continue; + cur_code = next_code[code_size]++; + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + if (code_size <= TINFL_FAST_LOOKUP_BITS) + { + mz_int16 k = (mz_int16)((code_size << 9) | sym_index); + while (rev_code < TINFL_FAST_LOOKUP_SIZE) + { + pTable->m_look_up[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) + { + pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); + for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + if (!pTable->m_tree[-tree_cur - 1]) + { + pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + else + tree_cur = pTable->m_tree[-tree_cur - 1]; + } + tree_cur -= ((rev_code >>= 1) & 1); + pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; + } + if (r->m_type == 2) + { + for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) + { + mz_uint s; + TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); + if (dist < 16) + { + r->m_len_codes[counter++] = (mz_uint8)dist; + continue; + } + if ((dist == 16) && (!counter)) + { + TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); + } + num_extra = "\02\03\07"[dist - 16]; + TINFL_GET_BITS(18, s, num_extra); + s += "\03\03\013"[dist - 16]; + TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); + counter += s; + } + if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) + { + TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); + } + TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); + TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); + } + } + for (;;) + { + mz_uint8 *pSrc; + for (;;) + { + if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) + { + TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); + if (counter >= 256) + break; + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = (mz_uint8)counter; + } + else + { + int sym2; + mz_uint code_len; +#if TINFL_USE_64BIT_BITBUF + if (num_bits < 30) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 4; + num_bits += 32; + } +#else + if (num_bits < 15) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; + do + { + sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + counter = sym2; + bit_buf >>= code_len; + num_bits -= code_len; + if (counter & 256) + break; + +#if !TINFL_USE_64BIT_BITBUF + if (num_bits < 15) + { + bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); + pIn_buf_cur += 2; + num_bits += 16; + } +#endif + if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) + code_len = sym2 >> 9; + else + { + code_len = TINFL_FAST_LOOKUP_BITS; + do + { + sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; + } while (sym2 < 0); + } + bit_buf >>= code_len; + num_bits -= code_len; + + pOut_buf_cur[0] = (mz_uint8)counter; + if (sym2 & 256) + { + pOut_buf_cur++; + counter = sym2; + break; + } + pOut_buf_cur[1] = (mz_uint8)sym2; + pOut_buf_cur += 2; + } + } + if ((counter &= 511) == 256) + break; + + num_extra = s_length_extra[counter - 257]; + counter = s_length_base[counter - 257]; + if (num_extra) + { + mz_uint extra_bits; + TINFL_GET_BITS(25, extra_bits, num_extra); + counter += extra_bits; + } + + TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); + num_extra = s_dist_extra[dist]; + dist = s_dist_base[dist]; + if (num_extra) + { + mz_uint extra_bits; + TINFL_GET_BITS(27, extra_bits, num_extra); + dist += extra_bits; + } + + dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; + if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) + { + TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); + } + + pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); + + if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) + { + while (counter--) + { + while (pOut_buf_cur >= pOut_buf_end) + { + TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); + } + *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; + } + continue; + } +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES + else if ((counter >= 9) && (counter <= dist)) + { + const mz_uint8 *pSrc_end = pSrc + (counter & ~7); + do + { +#ifdef MINIZ_UNALIGNED_USE_MEMCPY + memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2); +#else + ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; + ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; +#endif + pOut_buf_cur += 8; + } while ((pSrc += 8) < pSrc_end); + if ((counter &= 7) < 3) + { + if (counter) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + continue; + } + } +#endif + while(counter>2) + { + pOut_buf_cur[0] = pSrc[0]; + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur[2] = pSrc[2]; + pOut_buf_cur += 3; + pSrc += 3; + counter -= 3; + } + if (counter > 0) + { + pOut_buf_cur[0] = pSrc[0]; + if (counter > 1) + pOut_buf_cur[1] = pSrc[1]; + pOut_buf_cur += counter; + } + } + } + } while (!(r->m_final & 1)); + + /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ + /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */ + TINFL_SKIP_BITS(32, num_bits & 7); + while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) + { + --pIn_buf_cur; + num_bits -= 8; + } + bit_buf &= (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); + MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */ + + if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) + { + for (counter = 0; counter < 4; ++counter) + { + mz_uint s; + if (num_bits) + TINFL_GET_BITS(41, s, 8); + else + TINFL_GET_BYTE(42, s); + r->m_z_adler32 = (r->m_z_adler32 << 8) | s; + } + } + TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); + + TINFL_CR_FINISH + +common_exit: + /* As long as we aren't telling the caller that we NEED more input to make forward progress: */ + /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */ + /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */ + if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS)) + { + while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8)) + { + --pIn_buf_cur; + num_bits -= 8; + } + } + r->m_num_bits = num_bits; + r->m_bit_buf = bit_buf & (tinfl_bit_buf_t)((((mz_uint64)1) << num_bits) - (mz_uint64)1); + r->m_dist = dist; + r->m_counter = counter; + r->m_num_extra = num_extra; + r->m_dist_from_out_buf_start = dist_from_out_buf_start; + *pIn_buf_size = pIn_buf_cur - pIn_buf_next; + *pOut_buf_size = pOut_buf_cur - pOut_buf_next; + if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) + { + const mz_uint8 *ptr = pOut_buf_next; + size_t buf_len = *pOut_buf_size; + mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; + size_t block_len = buf_len % 5552; + while (buf_len) + { + for (i = 0; i + 7 < block_len; i += 8, ptr += 8) + { + s1 += ptr[0]; s2 += s1; + s1 += ptr[1]; s2 += s1; + s1 += ptr[2]; s2 += s1; + s1 += ptr[3]; s2 += s1; + s1 += ptr[4]; s2 += s1; + s1 += ptr[5]; s2 += s1; + s1 += ptr[6]; s2 += s1; + s1 += ptr[7]; s2 += s1; + } + for (; i < block_len; ++i) { + s1 += *ptr++; s2 += s1; + } + s1 %= 65521U; s2 %= 65521U; + buf_len -= block_len; + block_len = 5552; + } + r->m_check_adler32 = (s2 << 16) + s1; + if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) + status = TINFL_STATUS_ADLER32_MISMATCH; + } + return status; +} + +/* Higher level helper functions. */ +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags) +{ + tinfl_decompressor decomp; + void *pBuf = NULL, *pNew_buf; + size_t src_buf_ofs = 0, out_buf_capacity = 0; + *pOut_len = 0; + tinfl_init(&decomp); + for (;;) + { + size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size, + (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) + { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + src_buf_ofs += src_buf_size; + *pOut_len += dst_buf_size; + if (status == TINFL_STATUS_DONE) + break; + new_out_buf_capacity = out_buf_capacity * 2; + if (new_out_buf_capacity < 128) + new_out_buf_capacity = 128; + pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); + if (!pNew_buf) + { + MZ_FREE(pBuf); + *pOut_len = 0; + return NULL; + } + pBuf = pNew_buf; + out_buf_capacity = new_out_buf_capacity; + } + return pBuf; +} + +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags) +{ + tinfl_decompressor decomp; + tinfl_status status; + tinfl_init(&decomp); + status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); + return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; +} + +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags) +{ + int result = 0; + tinfl_decompressor decomp; + mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); + size_t in_buf_ofs = 0, dict_ofs = 0; + if (!pDict) + return TINFL_STATUS_FAILED; + tinfl_init(&decomp); + for (;;) + { + size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; + tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, + (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); + in_buf_ofs += in_buf_size; + if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) + break; + if (status != TINFL_STATUS_HAS_MORE_OUTPUT) + { + result = (status == TINFL_STATUS_DONE); + break; + } + dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); + } + MZ_FREE(pDict); + *pIn_buf_size = in_buf_ofs; + return result; +} + +#ifndef MINIZ_NO_MALLOC +tinfl_decompressor *tinfl_decompressor_alloc() +{ + tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor)); + if (pDecomp) + tinfl_init(pDecomp); + return pDecomp; +} + +void tinfl_decompressor_free(tinfl_decompressor *pDecomp) +{ + MZ_FREE(pDecomp); +} +#endif + +#ifdef __cplusplus +} +#endif +/************************************************************************** + * + * Copyright 2013-2014 RAD Game Tools and Valve Software + * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC + * Copyright 2016 Martin Raiber + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- .ZIP archive reading */ + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include + +#if defined(_MSC_VER) || defined(__MINGW64__) +static FILE *mz_fopen(const char *pFilename, const char *pMode) +{ + FILE *pFile = NULL; + fopen_s(&pFile, pFilename, pMode); + return pFile; +} +static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) +{ + FILE *pFile = NULL; + if (freopen_s(&pFile, pPath, pMode, pStream)) + return NULL; + return pFile; +} +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN mz_fopen +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 _ftelli64 +#define MZ_FSEEK64 _fseeki64 +#define MZ_FILE_STAT_STRUCT _stat64 +#define MZ_FILE_STAT _stat64 +#define MZ_FFLUSH fflush +#define MZ_FREOPEN mz_freopen +#define MZ_DELETE_FILE remove +#elif defined(__MINGW32__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT _stat +#define MZ_FILE_STAT _stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__TINYC__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftell +#define MZ_FSEEK64 fseek +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN(f, m) fopen64(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello64 +#define MZ_FSEEK64 fseeko64 +#define MZ_FILE_STAT_STRUCT stat64 +#define MZ_FILE_STAT stat64 +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) +#define MZ_DELETE_FILE remove +#elif defined(__APPLE__) +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#define MZ_FTELL64 ftello +#define MZ_FSEEK64 fseeko +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(p, m, s) freopen(p, m, s) +#define MZ_DELETE_FILE remove + +#else +#pragma message("Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.") +#ifndef MINIZ_NO_TIME +#include +#endif +#define MZ_FOPEN(f, m) fopen(f, m) +#define MZ_FCLOSE fclose +#define MZ_FREAD fread +#define MZ_FWRITE fwrite +#ifdef __STRICT_ANSI__ +#define MZ_FTELL64 ftell +#define MZ_FSEEK64 fseek +#else +#define MZ_FTELL64 ftello +#define MZ_FSEEK64 fseeko +#endif +#define MZ_FILE_STAT_STRUCT stat +#define MZ_FILE_STAT stat +#define MZ_FFLUSH fflush +#define MZ_FREOPEN(f, m, s) freopen(f, m, s) +#define MZ_DELETE_FILE remove +#endif /* #ifdef _MSC_VER */ +#endif /* #ifdef MINIZ_NO_STDIO */ + +#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) + +/* Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. */ +enum +{ + /* ZIP archive identifiers and record sizes */ + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, + MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, + MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, + + /* ZIP64 archive identifier and record sizes */ + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06064b50, + MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50, + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE = 56, + MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE = 20, + MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001, + MZ_ZIP_DATA_DESCRIPTOR_ID = 0x08074b50, + MZ_ZIP_DATA_DESCRIPTER_SIZE64 = 24, + MZ_ZIP_DATA_DESCRIPTER_SIZE32 = 16, + + /* Central directory header record offsets */ + MZ_ZIP_CDH_SIG_OFS = 0, + MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, + MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, + MZ_ZIP_CDH_BIT_FLAG_OFS = 8, + MZ_ZIP_CDH_METHOD_OFS = 10, + MZ_ZIP_CDH_FILE_TIME_OFS = 12, + MZ_ZIP_CDH_FILE_DATE_OFS = 14, + MZ_ZIP_CDH_CRC32_OFS = 16, + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, + MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, + MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, + MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, + MZ_ZIP_CDH_DISK_START_OFS = 34, + MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, + MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, + + /* Local directory header offsets */ + MZ_ZIP_LDH_SIG_OFS = 0, + MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, + MZ_ZIP_LDH_BIT_FLAG_OFS = 6, + MZ_ZIP_LDH_METHOD_OFS = 8, + MZ_ZIP_LDH_FILE_TIME_OFS = 10, + MZ_ZIP_LDH_FILE_DATE_OFS = 12, + MZ_ZIP_LDH_CRC32_OFS = 14, + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, + MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, + MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, + MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR = 1 << 3, + + /* End of central directory offsets */ + MZ_ZIP_ECDH_SIG_OFS = 0, + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, + MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, + MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, + MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, + + /* ZIP64 End of central directory locator offsets */ + MZ_ZIP64_ECDL_SIG_OFS = 0, /* 4 bytes */ + MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS = 4, /* 4 bytes */ + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8, /* 8 bytes */ + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */ + + /* ZIP64 End of central directory header offsets */ + MZ_ZIP64_ECDH_SIG_OFS = 0, /* 4 bytes */ + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS = 4, /* 8 bytes */ + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS = 12, /* 2 bytes */ + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS = 14, /* 2 bytes */ + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS = 16, /* 4 bytes */ + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS = 20, /* 4 bytes */ + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 24, /* 8 bytes */ + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS = 32, /* 8 bytes */ + MZ_ZIP64_ECDH_CDIR_SIZE_OFS = 40, /* 8 bytes */ + MZ_ZIP64_ECDH_CDIR_OFS_OFS = 48, /* 8 bytes */ + MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID = 0, + MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG = 0x10, + MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED = 1, + MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG = 32, + MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64, + MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED = 8192, + MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 = 1 << 11 +}; + +typedef struct +{ + void *m_p; + size_t m_size, m_capacity; + mz_uint m_element_size; +} mz_zip_array; + +struct mz_zip_internal_state_tag +{ + mz_zip_array m_central_dir; + mz_zip_array m_central_dir_offsets; + mz_zip_array m_sorted_central_dir_offsets; + + /* The flags passed in when the archive is initially opened. */ + uint32_t m_init_flags; + + /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc. */ + mz_bool m_zip64; + + /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64 will also be slammed to true too, even if we didn't find a zip64 end of central dir header, etc.) */ + mz_bool m_zip64_has_extended_info_fields; + + /* These fields are used by the file, FILE, memory, and memory/heap read/write helpers. */ + MZ_FILE *m_pFile; + mz_uint64 m_file_archive_start_ofs; + + void *m_pMem; + size_t m_mem_size; + size_t m_mem_capacity; +}; + +#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size + +#if defined(DEBUG) || defined(_DEBUG) || defined(NDEBUG) +static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index) +{ + (void)pArray; + MZ_ASSERT(index < pArray->m_size); + return index; +} +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)] +#else +#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index] +#endif + +static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array *pArray, mz_uint32 element_size) +{ + memset(pArray, 0, sizeof(mz_zip_array)); + pArray->m_element_size = element_size; +} + +static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray) +{ + pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); + memset(pArray, 0, sizeof(mz_zip_array)); +} + +static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing) +{ + void *pNew_p; + size_t new_capacity = min_new_capacity; + MZ_ASSERT(pArray->m_element_size); + if (pArray->m_capacity >= min_new_capacity) + return MZ_TRUE; + if (growing) + { + new_capacity = MZ_MAX(1, pArray->m_capacity); + while (new_capacity < min_new_capacity) + new_capacity *= 2; + } + if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) + return MZ_FALSE; + pArray->m_p = pNew_p; + pArray->m_capacity = new_capacity; + return MZ_TRUE; +} + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS +static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing) +{ + if (new_capacity > pArray->m_capacity) + { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) + return MZ_FALSE; + } + return MZ_TRUE; +} +#endif + +static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing) +{ + if (new_size > pArray->m_capacity) + { + if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) + return MZ_FALSE; + } + pArray->m_size = new_size; + return MZ_TRUE; +} + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS +static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n) +{ + return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); +} + +static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n) +{ + size_t orig_size = pArray->m_size; + if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) + return MZ_FALSE; + if (n > 0) + memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); + return MZ_TRUE; +} +#endif + +#ifndef MINIZ_NO_TIME +static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date) +{ + struct tm tm; + memset(&tm, 0, sizeof(tm)); + tm.tm_isdst = -1; + tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; + tm.tm_mon = ((dos_date >> 5) & 15) - 1; + tm.tm_mday = dos_date & 31; + tm.tm_hour = (dos_time >> 11) & 31; + tm.tm_min = (dos_time >> 5) & 63; + tm.tm_sec = (dos_time << 1) & 62; + return mktime(&tm); +} + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS +static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date) +{ +#ifdef _MSC_VER + struct tm tm_struct; + struct tm *tm = &tm_struct; + errno_t err = localtime_s(tm, &time); + if (err) + { + *pDOS_date = 0; + *pDOS_time = 0; + return; + } +#else + struct tm *tm = localtime(&time); +#endif /* #ifdef _MSC_VER */ + + *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); + *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); +} +#endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */ + +#ifndef MINIZ_NO_STDIO +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS +static mz_bool mz_zip_get_file_modified_time(const char *pFilename, MZ_TIME_T *pTime) +{ + struct MZ_FILE_STAT_STRUCT file_stat; + + /* On Linux with x86 glibc, this call will fail on large files (I think >= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */ + if (MZ_FILE_STAT(pFilename, &file_stat) != 0) + return MZ_FALSE; + + *pTime = file_stat.st_mtime; + + return MZ_TRUE; +} +#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/ + +static mz_bool mz_zip_set_file_times(const char *pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time) +{ + struct utimbuf t; + + memset(&t, 0, sizeof(t)); + t.actime = access_time; + t.modtime = modified_time; + + return !utime(pFilename, &t); +} +#endif /* #ifndef MINIZ_NO_STDIO */ +#endif /* #ifndef MINIZ_NO_TIME */ + +static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive *pZip, mz_zip_error err_num) +{ + if (pZip) + pZip->m_last_error = err_num; + return MZ_FALSE; +} + +static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags) +{ + (void)flags; + if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!pZip->m_pAlloc) + pZip->m_pAlloc = miniz_def_alloc_func; + if (!pZip->m_pFree) + pZip->m_pFree = miniz_def_free_func; + if (!pZip->m_pRealloc) + pZip->m_pRealloc = miniz_def_realloc_func; + + pZip->m_archive_size = 0; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + pZip->m_last_error = MZ_ZIP_NO_ERROR; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + pZip->m_pState->m_init_flags = flags; + pZip->m_pState->m_zip64 = MZ_FALSE; + pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE; + + pZip->m_zip_mode = MZ_ZIP_MODE_READING; + + return MZ_TRUE; +} + +const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip) +{ + // these aren't offsets, it's a sorted array of the file index elements + return (const mz_uint32*)(pZip->m_pState->m_sorted_central_dir_offsets.m_p); +} + + +static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; + pR++; + } + return (pL == pE) ? (l_len < r_len) : (l < r); +} + +#define MZ_SWAP_UINT32(a, b) \ + do \ + { \ + mz_uint32 t = a; \ + a = b; \ + b = t; \ + } \ + MZ_MACRO_END + +/* Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) */ +static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices; + mz_uint32 start, end; + const mz_uint32 size = pZip->m_total_files; + + if (size <= 1U) + return; + + pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + + start = (size - 2U) >> 1U; + for (;;) + { + mz_uint64 child, root = start; + for (;;) + { + if ((child = (root << 1U) + 1U) >= size) + break; + child += (((child + 1U) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U]))); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + if (!start) + break; + start--; + } + + end = size - 1; + while (end > 0) + { + mz_uint64 child, root = 0; + MZ_SWAP_UINT32(pIndices[end], pIndices[0]); + for (;;) + { + if ((child = (root << 1U) + 1U) >= end) + break; + child += (((child + 1U) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U])); + if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) + break; + MZ_SWAP_UINT32(pIndices[root], pIndices[child]); + root = child; + } + end--; + } +} + +static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive *pZip, mz_uint32 record_sig, mz_uint32 record_size, mz_int64 *pOfs) +{ + mz_int64 cur_file_ofs; + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; + mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + + /* Basic sanity checks - reject files which are too small */ + if (pZip->m_archive_size < record_size) + return MZ_FALSE; + + /* Find the record by scanning the file from the end towards the beginning. */ + cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); + for (;;) + { + int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); + + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) + return MZ_FALSE; + + for (i = n - 4; i >= 0; --i) + { + mz_uint s = MZ_READ_LE32(pBuf + i); + if (s == record_sig) + { + if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size) + break; + } + } + + if (i >= 0) + { + cur_file_ofs += i; + break; + } + + /* Give up if we've searched the entire file, or we've gone back "too far" (~64kb) */ + if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (MZ_UINT16_MAX + record_size))) + return MZ_FALSE; + + cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); + } + + *pOfs = cur_file_ofs; + return MZ_TRUE; +} + +static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint flags) +{ + mz_uint cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0; + mz_uint64 cdir_ofs = 0; + mz_int64 cur_file_ofs = 0; + const mz_uint8 *p; + + mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; + mz_uint8 *pBuf = (mz_uint8 *)buf_u32; + mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); + mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pZip64_locator = (mz_uint8 *)zip64_end_of_central_dir_locator_u32; + + mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pZip64_end_of_central_dir = (mz_uint8 *)zip64_end_of_central_dir_header_u32; + + mz_uint64 zip64_end_of_central_dir_ofs = 0; + + /* Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. */ + if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + + if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs)) + return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR); + + /* Read and verify the end of central directory record. */ + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + + if (cur_file_ofs >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) + { + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) + { + if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG) + { + zip64_end_of_central_dir_ofs = MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS); + if (zip64_end_of_central_dir_ofs > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)) + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + + if (pZip->m_pRead(pZip->m_pIO_opaque, zip64_end_of_central_dir_ofs, pZip64_end_of_central_dir, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) + { + if (MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG) + { + pZip->m_pState->m_zip64 = MZ_TRUE; + } + } + } + } + } + + pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS); + cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); + num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); + cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); + cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS); + cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); + + if (pZip->m_pState->m_zip64) + { + mz_uint32 zip64_total_num_of_disks = MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS); + mz_uint64 zip64_cdir_total_entries = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS); + mz_uint64 zip64_cdir_total_entries_on_this_disk = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS); + mz_uint64 zip64_size_of_end_of_central_dir_record = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS); + mz_uint64 zip64_size_of_central_directory = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_SIZE_OFS); + + if (zip64_size_of_end_of_central_dir_record < (MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - 12)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if (zip64_total_num_of_disks != 1U) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); + + /* Check for miniz's practical limits */ + if (zip64_cdir_total_entries > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + + pZip->m_total_files = (mz_uint32)zip64_cdir_total_entries; + + if (zip64_cdir_total_entries_on_this_disk > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + + cdir_entries_on_this_disk = (mz_uint32)zip64_cdir_total_entries_on_this_disk; + + /* Check for miniz's current practical limits (sorry, this should be enough for millions of files) */ + if (zip64_size_of_central_directory > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + + cdir_size = (mz_uint32)zip64_size_of_central_directory; + + num_this_disk = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS); + + cdir_disk_index = MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS); + + cdir_ofs = MZ_READ_LE64(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_CDIR_OFS_OFS); + } + + if (pZip->m_total_files != cdir_entries_on_this_disk) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); + + if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); + + if (cdir_size < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + pZip->m_central_directory_file_ofs = cdir_ofs; + + if (pZip->m_total_files) + { + mz_uint i, n; + /* Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and possibly another to hold the sorted indices. */ + if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || + (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + if (sort_central_dir) + { + if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + /* Now create an index into the central directory file records, do some basic sanity checking on each record */ + p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; + for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) + { + mz_uint total_header_size, disk_index, bit_flags, filename_size, ext_data_size; + mz_uint64 comp_size, decomp_size, local_header_ofs; + + if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); + + if (sort_central_dir) + MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + filename_size = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + ext_data_size = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); + + if ((!pZip->m_pState->m_zip64_has_extended_info_fields) && + (ext_data_size) && + (MZ_MAX(MZ_MAX(comp_size, decomp_size), local_header_ofs) == MZ_UINT32_MAX)) + { + /* Attempt to find zip64 extended information field in the entry's extra data */ + mz_uint32 extra_size_remaining = ext_data_size; + + if (extra_size_remaining) + { + const mz_uint8 *pExtra_data; + void* buf = NULL; + + if (MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + ext_data_size > n) + { + buf = MZ_MALLOC(ext_data_size); + if(buf==NULL) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size, buf, ext_data_size) != ext_data_size) + { + MZ_FREE(buf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + + pExtra_data = (mz_uint8*)buf; + } + else + { + pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size; + } + + do + { + mz_uint32 field_id; + mz_uint32 field_data_size; + + if (extra_size_remaining < (sizeof(mz_uint16) * 2)) + { + MZ_FREE(buf); + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + field_id = MZ_READ_LE16(pExtra_data); + field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); + + if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) + { + MZ_FREE(buf); + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) + { + /* Ok, the archive didn't have any zip64 headers but it uses a zip64 extended information field so mark it as zip64 anyway (this can occur with infozip's zip util when it reads compresses files from stdin). */ + pZip->m_pState->m_zip64 = MZ_TRUE; + pZip->m_pState->m_zip64_has_extended_info_fields = MZ_TRUE; + break; + } + + pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; + extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; + } while (extra_size_remaining); + + MZ_FREE(buf); + } + } + + /* I've seen archives that aren't marked as zip64 that uses zip64 ext data, argh */ + if ((comp_size != MZ_UINT32_MAX) && (decomp_size != MZ_UINT32_MAX)) + { + if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); + if ((disk_index == MZ_UINT16_MAX) || ((disk_index != num_this_disk) && (disk_index != 1))) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_MULTIDISK); + + if (comp_size != MZ_UINT32_MAX) + { + if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + bit_flags = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + if (bit_flags & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + + if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + n -= total_header_size; + p += total_header_size; + } + } + + if (sort_central_dir) + mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); + + return MZ_TRUE; +} + +void mz_zip_zero_struct(mz_zip_archive *pZip) +{ + if (pZip) + MZ_CLEAR_OBJ(*pZip); +} + +static mz_bool mz_zip_reader_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) +{ + mz_bool status = MZ_TRUE; + + if (!pZip) + return MZ_FALSE; + + if ((!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + { + if (set_last_error) + pZip->m_last_error = MZ_ZIP_INVALID_PARAMETER; + + return MZ_FALSE; + } + + if (pZip->m_pState) + { + mz_zip_internal_state *pState = pZip->m_pState; + pZip->m_pState = NULL; + + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) + { + if (MZ_FCLOSE(pState->m_pFile) == EOF) + { + if (set_last_error) + pZip->m_last_error = MZ_ZIP_FILE_CLOSE_FAILED; + status = MZ_FALSE; + } + } + pState->m_pFile = NULL; + } +#endif /* #ifndef MINIZ_NO_STDIO */ + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + } + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + + return status; +} + +mz_bool mz_zip_reader_end(mz_zip_archive *pZip) +{ + return mz_zip_reader_end_internal(pZip, MZ_TRUE); +} +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags) +{ + if ((!pZip) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + + pZip->m_zip_type = MZ_ZIP_TYPE_USER; + pZip->m_archive_size = size; + + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end_internal(pZip, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); + memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); + return s; +} + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags) +{ + if (!pMem) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + + pZip->m_zip_type = MZ_ZIP_TYPE_MEMORY; + pZip->m_archive_size = size; + pZip->m_pRead = mz_zip_mem_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pNeeds_keepalive = NULL; + +#ifdef __cplusplus + pZip->m_pState->m_pMem = const_cast(pMem); +#else + pZip->m_pState->m_pMem = (void *)pMem; +#endif + + pZip->m_pState->m_mem_size = size; + + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end_internal(pZip, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + + file_ofs += pZip->m_pState->m_file_archive_start_ofs; + + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + + return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags) +{ + return mz_zip_reader_init_file_v2(pZip, pFilename, flags, 0, 0); +} + +mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size) +{ + mz_uint64 file_size; + MZ_FILE *pFile; + + if ((!pZip) || (!pFilename) || ((archive_size) && (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pFile = MZ_FOPEN(pFilename, "rb"); + if (!pFile) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + + file_size = archive_size; + if (!file_size) + { + if (MZ_FSEEK64(pFile, 0, SEEK_END)) + { + MZ_FCLOSE(pFile); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); + } + + file_size = MZ_FTELL64(pFile); + } + + /* TODO: Better sanity check archive_size and the # of actual remaining bytes */ + + if (file_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + { + MZ_FCLOSE(pFile); + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + } + + if (!mz_zip_reader_init_internal(pZip, flags)) + { + MZ_FCLOSE(pFile); + return MZ_FALSE; + } + + pZip->m_zip_type = MZ_ZIP_TYPE_FILE; + pZip->m_pRead = mz_zip_file_read_func; + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = file_size; + pZip->m_pState->m_file_archive_start_ofs = file_start_ofs; + + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end_internal(pZip, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags) +{ + mz_uint64 cur_file_ofs; + + if ((!pZip) || (!pFile)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + + cur_file_ofs = MZ_FTELL64(pFile); + + if (!archive_size) + { + if (MZ_FSEEK64(pFile, 0, SEEK_END)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); + + archive_size = MZ_FTELL64(pFile) - cur_file_ofs; + + if (archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE); + } + + if (!mz_zip_reader_init_internal(pZip, flags)) + return MZ_FALSE; + + pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; + pZip->m_pRead = mz_zip_file_read_func; + + pZip->m_pIO_opaque = pZip; + pZip->m_pState->m_pFile = pFile; + pZip->m_archive_size = archive_size; + pZip->m_pState->m_file_archive_start_ofs = cur_file_ofs; + + if (!mz_zip_reader_read_central_dir(pZip, flags)) + { + mz_zip_reader_end_internal(pZip, MZ_FALSE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +#endif /* #ifndef MINIZ_NO_STDIO */ + +static MZ_FORCEINLINE const mz_uint8 *mz_zip_get_cdh(mz_zip_archive *pZip, mz_uint file_index) +{ + if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files)) + return NULL; + return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); +} + +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint m_bit_flag; + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + if (!p) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return MZ_FALSE; + } + + m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + return (m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) != 0; +} + +mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint bit_flag; + mz_uint method; + + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + if (!p) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return MZ_FALSE; + } + + method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); + bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + + if ((method != 0) && (method != MZ_DEFLATED)) + { + mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); + return MZ_FALSE; + } + + if (bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION)) + { + mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + return MZ_FALSE; + } + + if (bit_flag & MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG) + { + mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); + return MZ_FALSE; + } + + return MZ_TRUE; +} + +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_uint filename_len, attribute_mapping_id, external_attr; + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + if (!p) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return MZ_FALSE; + } + + filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_len) + { + if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') + return MZ_TRUE; + } + + /* Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct. */ + /* Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field. */ + /* FIXME: Remove this check? Is it necessary - we already check the filename. */ + attribute_mapping_id = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS) >> 8; + (void)attribute_mapping_id; + + external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + if ((external_attr & MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG) != 0) + { + return MZ_TRUE; + } + + return MZ_FALSE; +} + +static mz_bool mz_zip_file_stat_internal(mz_zip_archive *pZip, mz_uint file_index, const mz_uint8 *pCentral_dir_header, mz_zip_archive_file_stat *pStat, mz_bool *pFound_zip64_extra_data) +{ + mz_uint n; + const mz_uint8 *p = pCentral_dir_header; + + if (pFound_zip64_extra_data) + *pFound_zip64_extra_data = MZ_FALSE; + + if ((!p) || (!pStat)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + /* Extract fields from the central directory record. */ + pStat->m_file_index = file_index; + pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); + pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); + pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); + pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); + pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); +#ifndef MINIZ_NO_TIME + pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); +#endif + pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); + pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); + pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); + pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); + + /* Copy as much of the filename and comment as possible. */ + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); + memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pStat->m_filename[n] = '\0'; + + n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); + n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); + pStat->m_comment_size = n; + memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); + pStat->m_comment[n] = '\0'; + + /* Set some flags for convienance */ + pStat->m_is_directory = mz_zip_reader_is_file_a_directory(pZip, file_index); + pStat->m_is_encrypted = mz_zip_reader_is_file_encrypted(pZip, file_index); + pStat->m_is_supported = mz_zip_reader_is_file_supported(pZip, file_index); + + /* See if we need to read any zip64 extended information fields. */ + /* Confusingly, these zip64 fields can be present even on non-zip64 archives (Debian zip on a huge files from stdin piped to stdout creates them). */ + if (MZ_MAX(MZ_MAX(pStat->m_comp_size, pStat->m_uncomp_size), pStat->m_local_header_ofs) == MZ_UINT32_MAX) + { + /* Attempt to find zip64 extended information field in the entry's extra data */ + mz_uint32 extra_size_remaining = MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS); + + if (extra_size_remaining) + { + const mz_uint8 *pExtra_data = p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + + do + { + mz_uint32 field_id; + mz_uint32 field_data_size; + + if (extra_size_remaining < (sizeof(mz_uint16) * 2)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + field_id = MZ_READ_LE16(pExtra_data); + field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); + + if ((field_data_size + sizeof(mz_uint16) * 2) > extra_size_remaining) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) + { + const mz_uint8 *pField_data = pExtra_data + sizeof(mz_uint16) * 2; + mz_uint32 field_data_remaining = field_data_size; + + if (pFound_zip64_extra_data) + *pFound_zip64_extra_data = MZ_TRUE; + + if (pStat->m_uncomp_size == MZ_UINT32_MAX) + { + if (field_data_remaining < sizeof(mz_uint64)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + pStat->m_uncomp_size = MZ_READ_LE64(pField_data); + pField_data += sizeof(mz_uint64); + field_data_remaining -= sizeof(mz_uint64); + } + + if (pStat->m_comp_size == MZ_UINT32_MAX) + { + if (field_data_remaining < sizeof(mz_uint64)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + pStat->m_comp_size = MZ_READ_LE64(pField_data); + pField_data += sizeof(mz_uint64); + field_data_remaining -= sizeof(mz_uint64); + } + + if (pStat->m_local_header_ofs == MZ_UINT32_MAX) + { + if (field_data_remaining < sizeof(mz_uint64)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + pStat->m_local_header_ofs = MZ_READ_LE64(pField_data); + pField_data += sizeof(mz_uint64); + field_data_remaining -= sizeof(mz_uint64); + } + + break; + } + + pExtra_data += sizeof(mz_uint16) * 2 + field_data_size; + extra_size_remaining = extra_size_remaining - sizeof(mz_uint16) * 2 - field_data_size; + } while (extra_size_remaining); + } + } + + return MZ_TRUE; +} + +static MZ_FORCEINLINE mz_bool mz_zip_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags) +{ + mz_uint i; + if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) + return 0 == memcmp(pA, pB, len); + for (i = 0; i < len; ++i) + if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) + return MZ_FALSE; + return MZ_TRUE; +} + +static MZ_FORCEINLINE int mz_zip_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len) +{ + const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; + mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); + mz_uint8 l = 0, r = 0; + pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + pE = pL + MZ_MIN(l_len, r_len); + while (pL < pE) + { + if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) + break; + pL++; + pR++; + } + return (pL == pE) ? (int)(l_len - r_len) : (l - r); +} + +static mz_bool mz_zip_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename, mz_uint32 *pIndex) +{ + mz_zip_internal_state *pState = pZip->m_pState; + const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; + const mz_zip_array *pCentral_dir = &pState->m_central_dir; + mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); + const uint32_t size = pZip->m_total_files; + const mz_uint filename_len = (mz_uint)strlen(pFilename); + + if (pIndex) + *pIndex = 0; + + if (size) + { + /* yes I could use uint32_t's, but then we would have to add some special case checks in the loop, argh, and */ + /* honestly the major expense here on 32-bit CPU's will still be the filename compare */ + mz_int64 l = 0, h = (mz_int64)size - 1; + + while (l <= h) + { + mz_int64 m = l + ((h - l) >> 1); + uint32_t file_index = pIndices[(uint32_t)m]; + + int comp = mz_zip_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); + if (!comp) + { + if (pIndex) + *pIndex = file_index; + return MZ_TRUE; + } + else if (comp < 0) + l = m + 1; + else + h = m - 1; + } + } + + return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); +} + +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags) +{ + mz_uint32 index; + if (!mz_zip_reader_locate_file_v2(pZip, pName, pComment, flags, &index)) + return -1; + else + return (int)index; +} + +mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *pIndex) +{ + mz_uint file_index; + size_t name_len, comment_len; + + if (pIndex) + *pIndex = 0; + + if ((!pZip) || (!pZip->m_pState) || (!pName)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + /* See if we can use a binary search */ + if (((pZip->m_pState->m_init_flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) && + (pZip->m_zip_mode == MZ_ZIP_MODE_READING) && + ((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) + { + return mz_zip_locate_file_binary_search(pZip, pName, pIndex); + } + + /* Locate the entry by scanning the entire central directory */ + name_len = strlen(pName); + if (name_len > MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + comment_len = pComment ? strlen(pComment) : 0; + if (comment_len > MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + for (file_index = 0; file_index < pZip->m_total_files; file_index++) + { + const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); + mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); + const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; + if (filename_len < name_len) + continue; + if (comment_len) + { + mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); + const char *pFile_comment = pFilename + filename_len + file_extra_len; + if ((file_comment_len != comment_len) || (!mz_zip_string_equal(pComment, pFile_comment, file_comment_len, flags))) + continue; + } + if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) + { + int ofs = filename_len - 1; + do + { + if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) + break; + } while (--ofs >= 0); + ofs++; + pFilename += ofs; + filename_len -= ofs; + } + if ((filename_len == name_len) && (mz_zip_string_equal(pName, pFilename, filename_len, flags))) + { + if (pIndex) + *pIndex = file_index; + return MZ_TRUE; + } + } + + return mz_zip_set_error(pZip, MZ_ZIP_FILE_NOT_FOUND); +} + +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + int status = TINFL_STATUS_DONE; + mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + tinfl_decompressor inflator; + + if ((!pZip) || (!pZip->m_pState) || ((buf_size) && (!pBuf)) || ((user_read_buf_size) && (!pUser_read_buf)) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + /* A directory or zero length file */ + if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) + return MZ_TRUE; + + /* Encryption and patch files are not supported. */ + if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + + /* This function only supports decompressing stored and deflate. */ + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); + + /* Ensure supplied output buffer is large enough. */ + needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; + if (buf_size < needed_size) + return mz_zip_set_error(pZip, MZ_ZIP_BUF_TOO_SMALL); + + /* Read and parse the local directory entry. */ + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + /* The file is stored or the caller has requested the compressed data. */ + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) == 0) + { + if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) + return mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); + } +#endif + + return MZ_TRUE; + } + + /* Decompress the file either directly from memory or from a file input buffer. */ + tinfl_init(&inflator); + + if (pZip->m_pState->m_pMem) + { + /* Read directly from the archive in memory. */ + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else if (pUser_read_buf) + { + /* Use a user provided read buffer. */ + if (!user_read_buf_size) + return MZ_FALSE; + pRead_buf = (mz_uint8 *)pUser_read_buf; + read_buf_size = user_read_buf_size; + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + else + { + /* Temporarily allocate a read buffer. */ + read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); + if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + do + { + /* The size_t cast here should be OK because we've verified that the output buffer is >= file_stat.m_uncomp_size above */ + size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + status = TINFL_STATUS_FAILED; + mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + out_buf_ofs += out_buf_size; + } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); + + if (status == TINFL_STATUS_DONE) + { + /* Make sure the entire file was decompressed, and check its CRC. */ + if (out_buf_ofs != file_stat.m_uncomp_size) + { + mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); + status = TINFL_STATUS_FAILED; + } +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + else if (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32) + { + mz_zip_set_error(pZip, MZ_ZIP_CRC_CHECK_FAILED); + status = TINFL_STATUS_FAILED; + } +#endif + } + + if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) +{ + mz_uint32 file_index; + if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) + return MZ_FALSE; + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); +} + +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); +} + +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags) +{ + return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); +} + +/* +void *mz_zip_reader_extract_file_uncompressed(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) +{ + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) + *pSize = 0; + + if (!p) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return NULL; + } + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; + + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) + { + mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + return NULL; + } + + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + return NULL; + } + + *pSize = alloc_size; + return +} +*/ + +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags) +{ + + mz_uint64 comp_size, uncomp_size, alloc_size; + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + void *pBuf; + + if (pSize) + *pSize = 0; + + if (!p) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return NULL; + } + + comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); + uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); + + alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; + if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) + { + mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + return NULL; + } + + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + return NULL; + } + + if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return NULL; + } + + if (pSize) + *pSize = (size_t)alloc_size; + return pBuf; + + +} + +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags) +{ + mz_uint32 file_index; + if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) + { + if (pSize) + *pSize = 0; + return MZ_FALSE; + } + return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); +} + +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + int status = TINFL_STATUS_DONE; +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + mz_uint file_crc32 = MZ_CRC32_INIT; +#endif + mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf = NULL; + void *pWrite_buf = NULL; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + if ((!pZip) || (!pZip->m_pState) || (!pCallback) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + /* A directory or zero length file */ + if ((file_stat.m_is_directory) || (!file_stat.m_comp_size)) + return MZ_TRUE; + + /* Encryption and patch files are not supported. */ + if (file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + + /* This function only supports decompressing stored and deflate. */ + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); + + /* Read and do some minimal validation of the local directory entry (this doesn't crack the zip64 stuff, which we already have from the central dir) */ + cur_file_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + /* Decompress the file either directly from memory or from a file input buffer. */ + if (pZip->m_pState->m_pMem) + { + pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; + read_buf_size = read_buf_avail = file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + read_buf_size = MZ_MIN(file_stat.m_comp_size, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + read_buf_avail = 0; + comp_remaining = file_stat.m_comp_size; + } + + if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) + { + /* The file is stored or the caller has requested the compressed data. */ + if (pZip->m_pState->m_pMem) + { + if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > MZ_UINT32_MAX)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) + { + mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); + status = TINFL_STATUS_FAILED; + } + else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size); +#endif + } + + cur_file_ofs += file_stat.m_comp_size; + out_buf_ofs += file_stat.m_comp_size; + comp_remaining = 0; + } + else + { + while (comp_remaining) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + status = TINFL_STATUS_FAILED; + break; + } + +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); + } +#endif + + if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); + status = TINFL_STATUS_FAILED; + break; + } + + cur_file_ofs += read_buf_avail; + out_buf_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + } + } + } + else + { + tinfl_decompressor inflator; + tinfl_init(&inflator); + + if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + status = TINFL_STATUS_FAILED; + } + else + { + do + { + mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) + { + read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); + if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + status = TINFL_STATUS_FAILED; + break; + } + cur_file_ofs += read_buf_avail; + comp_remaining -= read_buf_avail; + read_buf_ofs = 0; + } + + in_buf_size = (size_t)read_buf_avail; + status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + read_buf_avail -= in_buf_size; + read_buf_ofs += in_buf_size; + + if (out_buf_size) + { + if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) + { + mz_zip_set_error(pZip, MZ_ZIP_WRITE_CALLBACK_FAILED); + status = TINFL_STATUS_FAILED; + break; + } + +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); +#endif + if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) + { + mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); + status = TINFL_STATUS_FAILED; + break; + } + } + } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); + } + } + + if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + { + /* Make sure the entire file was decompressed, and check its CRC. */ + if (out_buf_ofs != file_stat.m_uncomp_size) + { + mz_zip_set_error(pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); + status = TINFL_STATUS_FAILED; + } +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + else if (file_crc32 != file_stat.m_crc32) + { + mz_zip_set_error(pZip, MZ_ZIP_DECOMPRESSION_FAILED); + status = TINFL_STATUS_FAILED; + } +#endif + } + + if (!pZip->m_pState->m_pMem) + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + + if (pWrite_buf) + pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); + + return status == TINFL_STATUS_DONE; +} + +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags) +{ + mz_uint32 file_index; + if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) + return MZ_FALSE; + + return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); +} + +size_t mz_zip_reader_get_raw_data_offset(mz_zip_archive *pZip, mz_uint file_index) +{ + mz_zip_archive_file_stat file_stat; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + // just so can read the data in debugger + //mz_local_file_header* header = (mz_local_file_header*)&local_header_u32; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return 0; + + // TODO: for now assume mmap only case + const mz_uint8* pMem = (const mz_uint8 *)pZip->m_pState->m_pMem; + + // DONE: lookup the file_stat from the index, and do we need to setup fileIO to read the header + // to then find out the filename length to then add to the offset to get to the file data. + + mz_uint64 cur_file_ofs = file_stat.m_local_header_ofs; + + cur_file_ofs += pZip->m_pState->m_file_archive_start_ofs; + + memcpy(pLocal_header, pMem + cur_file_ofs, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + + // all local headers start with same signature + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return 0; + + // advance past filename and extra length data + mz_uint64 headerOffset = MZ_ZIP_LOCAL_DIR_HEADER_SIZE + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + + cur_file_ofs += headerOffset; + + //if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) + // return 0; + + return cur_file_ofs; +} + +const uint8_t* mz_zip_reader_get_raw_data(mz_zip_archive *pZip, mz_uint file_index) +{ + size_t offset = mz_zip_reader_get_raw_data_offset(pZip, file_index); + if (offset == 0) + return NULL; + + const mz_uint8* pMem = (const mz_uint8 *)pZip->m_pState->m_pMem; + return pMem + offset; +} + +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) +{ + mz_zip_reader_extract_iter_state *pState; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + + /* Argument sanity check */ + if ((!pZip) || (!pZip->m_pState)) + return NULL; + + /* Allocate an iterator status structure */ + pState = (mz_zip_reader_extract_iter_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_reader_extract_iter_state)); + if (!pState) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + return NULL; + } + + /* Fetch file details */ + if (!mz_zip_reader_file_stat(pZip, file_index, &pState->file_stat)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + /* Encryption and patch files are not supported. */ + if (pState->file_stat.m_bit_flag & (MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION | MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG)) + { + mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + /* This function only supports decompressing stored and deflate. */ + if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (pState->file_stat.m_method != 0) && (pState->file_stat.m_method != MZ_DEFLATED)) + { + mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + /* Init state - save args */ + pState->pZip = pZip; + pState->flags = flags; + + /* Init state - reset variables to defaults */ + pState->status = TINFL_STATUS_DONE; +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + pState->file_crc32 = MZ_CRC32_INIT; +#endif + pState->read_buf_ofs = 0; + pState->out_buf_ofs = 0; + pState->pRead_buf = NULL; + pState->pWrite_buf = NULL; + pState->out_blk_remain = 0; + + /* Read and parse the local directory entry. */ + pState->cur_file_ofs = pState->file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, pState->cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + pState->cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + if ((pState->cur_file_ofs + pState->file_stat.m_comp_size) > pZip->m_archive_size) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + + /* Decompress the file either directly from memory or from a file input buffer. */ + if (pZip->m_pState->m_pMem) + { + pState->pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + pState->cur_file_ofs; + pState->read_buf_size = pState->read_buf_avail = pState->file_stat.m_comp_size; + pState->comp_remaining = pState->file_stat.m_comp_size; + } + else + { + if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) + { + /* Decompression required, therefore intermediate read buffer required */ + pState->read_buf_size = MZ_MIN(pState->file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); + if (NULL == (pState->pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)pState->read_buf_size))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + } + else + { + /* Decompression not required - we will be reading directly into user buffer, no temp buf required */ + pState->read_buf_size = 0; + } + pState->read_buf_avail = 0; + pState->comp_remaining = pState->file_stat.m_comp_size; + } + + if (!((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method))) + { + /* Decompression required, init decompressor */ + tinfl_init( &pState->inflator ); + + /* Allocate write buffer */ + if (NULL == (pState->pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + if (pState->pRead_buf) + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->pRead_buf); + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + return NULL; + } + } + + return pState; +} + +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) +{ + mz_uint32 file_index; + + /* Locate file index by name */ + if (!mz_zip_reader_locate_file_v2(pZip, pFilename, NULL, flags, &file_index)) + return NULL; + + /* Construct iterator */ + return mz_zip_reader_extract_iter_new(pZip, file_index, flags); +} + +size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size) +{ + size_t copied_to_caller = 0; + + /* Argument sanity check */ + if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState) || (!pvBuf)) + return 0; + + if ((pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!pState->file_stat.m_method)) + { + /* The file is stored or the caller has requested the compressed data, calc amount to return. */ + copied_to_caller = (size_t)MZ_MIN( buf_size, pState->comp_remaining ); + + /* Zip is in memory....or requires reading from a file? */ + if (pState->pZip->m_pState->m_pMem) + { + /* Copy data to caller's buffer */ + memcpy( pvBuf, pState->pRead_buf, copied_to_caller ); + pState->pRead_buf = ((mz_uint8*)pState->pRead_buf) + copied_to_caller; + } + else + { + /* Read directly into caller's buffer */ + if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pvBuf, copied_to_caller) != copied_to_caller) + { + /* Failed to read all that was asked for, flag failure and alert user */ + mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); + pState->status = TINFL_STATUS_FAILED; + copied_to_caller = 0; + } + } + +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + /* Compute CRC if not returning compressed data only */ + if (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, (const mz_uint8 *)pvBuf, copied_to_caller); +#endif + + /* Advance offsets, dec counters */ + pState->cur_file_ofs += copied_to_caller; + pState->out_buf_ofs += copied_to_caller; + pState->comp_remaining -= copied_to_caller; + } + else + { + do + { + /* Calc ptr to write buffer - given current output pos and block size */ + mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pState->pWrite_buf + (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + + /* Calc max output size - given current output pos and block size */ + size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (pState->out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); + + if (!pState->out_blk_remain) + { + /* Read more data from file if none available (and reading from file) */ + if ((!pState->read_buf_avail) && (!pState->pZip->m_pState->m_pMem)) + { + /* Calc read size */ + pState->read_buf_avail = MZ_MIN(pState->read_buf_size, pState->comp_remaining); + if (pState->pZip->m_pRead(pState->pZip->m_pIO_opaque, pState->cur_file_ofs, pState->pRead_buf, (size_t)pState->read_buf_avail) != pState->read_buf_avail) + { + mz_zip_set_error(pState->pZip, MZ_ZIP_FILE_READ_FAILED); + pState->status = TINFL_STATUS_FAILED; + break; + } + + /* Advance offsets, dec counters */ + pState->cur_file_ofs += pState->read_buf_avail; + pState->comp_remaining -= pState->read_buf_avail; + pState->read_buf_ofs = 0; + } + + /* Perform decompression */ + in_buf_size = (size_t)pState->read_buf_avail; + pState->status = tinfl_decompress(&pState->inflator, (const mz_uint8 *)pState->pRead_buf + pState->read_buf_ofs, &in_buf_size, (mz_uint8 *)pState->pWrite_buf, pWrite_buf_cur, &out_buf_size, pState->comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); + pState->read_buf_avail -= in_buf_size; + pState->read_buf_ofs += in_buf_size; + + /* Update current output block size remaining */ + pState->out_blk_remain = out_buf_size; + } + + if (pState->out_blk_remain) + { + /* Calc amount to return. */ + size_t to_copy = MZ_MIN( (buf_size - copied_to_caller), pState->out_blk_remain ); + + /* Copy data to caller's buffer */ + memcpy( (uint8_t*)pvBuf + copied_to_caller, pWrite_buf_cur, to_copy ); + +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + /* Perform CRC */ + pState->file_crc32 = (mz_uint32)mz_crc32(pState->file_crc32, pWrite_buf_cur, to_copy); +#endif + + /* Decrement data consumed from block */ + pState->out_blk_remain -= to_copy; + + /* Inc output offset, while performing sanity check */ + if ((pState->out_buf_ofs += to_copy) > pState->file_stat.m_uncomp_size) + { + mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); + pState->status = TINFL_STATUS_FAILED; + break; + } + + /* Increment counter of data copied to caller */ + copied_to_caller += to_copy; + } + } while ( (copied_to_caller < buf_size) && ((pState->status == TINFL_STATUS_NEEDS_MORE_INPUT) || (pState->status == TINFL_STATUS_HAS_MORE_OUTPUT)) ); + } + + /* Return how many bytes were copied into user buffer */ + return copied_to_caller; +} + +mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState) +{ + int status; + + /* Argument sanity check */ + if ((!pState) || (!pState->pZip) || (!pState->pZip->m_pState)) + return MZ_FALSE; + + /* Was decompression completed and requested? */ + if ((pState->status == TINFL_STATUS_DONE) && (!(pState->flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + { + /* Make sure the entire file was decompressed, and check its CRC. */ + if (pState->out_buf_ofs != pState->file_stat.m_uncomp_size) + { + mz_zip_set_error(pState->pZip, MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE); + pState->status = TINFL_STATUS_FAILED; + } +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + else if (pState->file_crc32 != pState->file_stat.m_crc32) + { + mz_zip_set_error(pState->pZip, MZ_ZIP_DECOMPRESSION_FAILED); + pState->status = TINFL_STATUS_FAILED; + } +#endif + } + + /* Free buffers */ + if (!pState->pZip->m_pState->m_pMem) + pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pRead_buf); + if (pState->pWrite_buf) + pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState->pWrite_buf); + + /* Save status */ + status = pState->status; + + /* Free context */ + pState->pZip->m_pFree(pState->pZip->m_pAlloc_opaque, pState); + + return status == TINFL_STATUS_DONE; +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n) +{ + (void)ofs; + + return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); +} + +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags) +{ + mz_bool status; + mz_zip_archive_file_stat file_stat; + MZ_FILE *pFile; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); + + pFile = MZ_FOPEN(pDst_filename, "wb"); + if (!pFile) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + + status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); + + if (MZ_FCLOSE(pFile) == EOF) + { + if (status) + mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); + + status = MZ_FALSE; + } + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) + if (status) + mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); +#endif + + return status; +} + +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags) +{ + mz_uint32 file_index; + if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) + return MZ_FALSE; + + return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); +} + +mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *pFile, mz_uint flags) +{ + mz_zip_archive_file_stat file_stat; + + if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) + return MZ_FALSE; + + if ((file_stat.m_is_directory) || (!file_stat.m_is_supported)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); + + return mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); +} + +mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags) +{ + mz_uint32 file_index; + if (!mz_zip_reader_locate_file_v2(pZip, pArchive_filename, NULL, flags, &file_index)) + return MZ_FALSE; + + return mz_zip_reader_extract_to_cfile(pZip, file_index, pFile, flags); +} +#endif /* #ifndef MINIZ_NO_STDIO */ + +static size_t mz_zip_compute_crc32_callback(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_uint32 *p = (mz_uint32 *)pOpaque; + (void)file_ofs; + *p = (mz_uint32)mz_crc32(*p, (const mz_uint8 *)pBuf, n); + return n; +} + +mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags) +{ + mz_zip_archive_file_stat file_stat; + mz_zip_internal_state *pState; + const mz_uint8 *pCentral_dir_header; + mz_bool found_zip64_ext_data_in_cdir = MZ_FALSE; + mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint64 local_header_ofs = 0; + mz_uint32 local_header_filename_len, local_header_extra_len, local_header_crc32; + mz_uint64 local_header_comp_size, local_header_uncomp_size; + mz_uint32 uncomp_crc32 = MZ_CRC32_INIT; + mz_bool has_data_descriptor; + mz_uint32 local_header_bit_flags; + + mz_zip_array file_data_array; + mz_zip_array_init(&file_data_array, 1); + + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (file_index > pZip->m_total_files) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + pCentral_dir_header = mz_zip_get_cdh(pZip, file_index); + + if (!mz_zip_file_stat_internal(pZip, file_index, pCentral_dir_header, &file_stat, &found_zip64_ext_data_in_cdir)) + return MZ_FALSE; + + /* A directory or zero length file */ + if ((file_stat.m_is_directory) || (!file_stat.m_uncomp_size)) + return MZ_TRUE; + + /* Encryption and patch files are not supported. */ + if (file_stat.m_is_encrypted) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_ENCRYPTION); + + /* This function only supports stored and deflate. */ + if ((file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_METHOD); + + if (!file_stat.m_is_supported) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_FEATURE); + + /* Read and parse the local directory entry. */ + local_header_ofs = file_stat.m_local_header_ofs; + if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + local_header_filename_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); + local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); + local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); + local_header_crc32 = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_CRC32_OFS); + local_header_bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + has_data_descriptor = (local_header_bit_flags & 8) != 0; + + if (local_header_filename_len != strlen(file_stat.m_filename)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if ((local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size) > pZip->m_archive_size) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if (!mz_zip_array_resize(pZip, &file_data_array, MZ_MAX(local_header_filename_len, local_header_extra_len), MZ_FALSE)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + if (local_header_filename_len) + { + if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE, file_data_array.m_p, local_header_filename_len) != local_header_filename_len) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + goto handle_failure; + } + + /* I've seen 1 archive that had the same pathname, but used backslashes in the local dir and forward slashes in the central dir. Do we care about this? For now, this case will fail validation. */ + if (memcmp(file_stat.m_filename, file_data_array.m_p, local_header_filename_len) != 0) + { + mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); + goto handle_failure; + } + } + + if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) + { + mz_uint32 extra_size_remaining = local_header_extra_len; + const mz_uint8 *pExtra_data = (const mz_uint8 *)file_data_array.m_p; + + if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + goto handle_failure; + } + + do + { + mz_uint32 field_id, field_data_size, field_total_size; + + if (extra_size_remaining < (sizeof(mz_uint16) * 2)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + field_id = MZ_READ_LE16(pExtra_data); + field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); + field_total_size = field_data_size + sizeof(mz_uint16) * 2; + + if (field_total_size > extra_size_remaining) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) + { + const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); + + if (field_data_size < sizeof(mz_uint64) * 2) + { + mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + goto handle_failure; + } + + local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); + local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); + + found_zip64_ext_data_in_ldir = MZ_TRUE; + break; + } + + pExtra_data += field_total_size; + extra_size_remaining -= field_total_size; + } while (extra_size_remaining); + } + + /* TODO: parse local header extra data when local_header_comp_size is 0xFFFFFFFF! (big_descriptor.zip) */ + /* I've seen zips in the wild with the data descriptor bit set, but proper local header values and bogus data descriptors */ + if ((has_data_descriptor) && (!local_header_comp_size) && (!local_header_crc32)) + { + mz_uint8 descriptor_buf[32]; + mz_bool has_id; + const mz_uint8 *pSrc; + mz_uint32 file_crc32; + mz_uint64 comp_size = 0, uncomp_size = 0; + + mz_uint32 num_descriptor_uint32s = ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) ? 6 : 4; + + if (pZip->m_pRead(pZip->m_pIO_opaque, local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_len + local_header_extra_len + file_stat.m_comp_size, descriptor_buf, sizeof(mz_uint32) * num_descriptor_uint32s) != (sizeof(mz_uint32) * num_descriptor_uint32s)) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + goto handle_failure; + } + + has_id = (MZ_READ_LE32(descriptor_buf) == MZ_ZIP_DATA_DESCRIPTOR_ID); + pSrc = has_id ? (descriptor_buf + sizeof(mz_uint32)) : descriptor_buf; + + file_crc32 = MZ_READ_LE32(pSrc); + + if ((pState->m_zip64) || (found_zip64_ext_data_in_ldir)) + { + comp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32)); + uncomp_size = MZ_READ_LE64(pSrc + sizeof(mz_uint32) + sizeof(mz_uint64)); + } + else + { + comp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32)); + uncomp_size = MZ_READ_LE32(pSrc + sizeof(mz_uint32) + sizeof(mz_uint32)); + } + + if ((file_crc32 != file_stat.m_crc32) || (comp_size != file_stat.m_comp_size) || (uncomp_size != file_stat.m_uncomp_size)) + { + mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); + goto handle_failure; + } + } + else + { + if ((local_header_crc32 != file_stat.m_crc32) || (local_header_comp_size != file_stat.m_comp_size) || (local_header_uncomp_size != file_stat.m_uncomp_size)) + { + mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); + goto handle_failure; + } + } + + mz_zip_array_clear(pZip, &file_data_array); + + if ((flags & MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY) == 0) + { + if (!mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_compute_crc32_callback, &uncomp_crc32, 0)) + return MZ_FALSE; + + /* 1 more check to be sure, although the extract checks too. */ + if (uncomp_crc32 != file_stat.m_crc32) + { + mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); + return MZ_FALSE; + } + } + + return MZ_TRUE; + +handle_failure: + mz_zip_array_clear(pZip, &file_data_array); + return MZ_FALSE; +} + +mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags) +{ + mz_zip_internal_state *pState; + uint32_t i; + + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + /* Basic sanity checks */ + if (!pState->m_zip64) + { + if (pZip->m_total_files > MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + if (pZip->m_archive_size > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + } + else + { + if (pZip->m_total_files >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + } + + for (i = 0; i < pZip->m_total_files; i++) + { + if (MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG & flags) + { + mz_uint32 found_index; + mz_zip_archive_file_stat stat; + + if (!mz_zip_reader_file_stat(pZip, i, &stat)) + return MZ_FALSE; + + if (!mz_zip_reader_locate_file_v2(pZip, stat.m_filename, NULL, 0, &found_index)) + return MZ_FALSE; + + /* This check can fail if there are duplicate filenames in the archive (which we don't check for when writing - that's up to the user) */ + if (found_index != i) + return mz_zip_set_error(pZip, MZ_ZIP_VALIDATION_FAILED); + } + + if (!mz_zip_validate_file(pZip, i, flags)) + return MZ_FALSE; + } + + return MZ_TRUE; +} + +mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr) +{ + mz_bool success = MZ_TRUE; + mz_zip_archive zip; + mz_zip_error actual_err = MZ_ZIP_NO_ERROR; + + if ((!pMem) || (!size)) + { + if (pErr) + *pErr = MZ_ZIP_INVALID_PARAMETER; + return MZ_FALSE; + } + + mz_zip_zero_struct(&zip); + + if (!mz_zip_reader_init_mem(&zip, pMem, size, flags)) + { + if (pErr) + *pErr = zip.m_last_error; + return MZ_FALSE; + } + + if (!mz_zip_validate_archive(&zip, flags)) + { + actual_err = zip.m_last_error; + success = MZ_FALSE; + } + + if (!mz_zip_reader_end_internal(&zip, success)) + { + if (!actual_err) + actual_err = zip.m_last_error; + success = MZ_FALSE; + } + + if (pErr) + *pErr = actual_err; + + return success; +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr) +{ + mz_bool success = MZ_TRUE; + mz_zip_archive zip; + mz_zip_error actual_err = MZ_ZIP_NO_ERROR; + + if (!pFilename) + { + if (pErr) + *pErr = MZ_ZIP_INVALID_PARAMETER; + return MZ_FALSE; + } + + mz_zip_zero_struct(&zip); + + if (!mz_zip_reader_init_file_v2(&zip, pFilename, flags, 0, 0)) + { + if (pErr) + *pErr = zip.m_last_error; + return MZ_FALSE; + } + + if (!mz_zip_validate_archive(&zip, flags)) + { + actual_err = zip.m_last_error; + success = MZ_FALSE; + } + + if (!mz_zip_reader_end_internal(&zip, success)) + { + if (!actual_err) + actual_err = zip.m_last_error; + success = MZ_FALSE; + } + + if (pErr) + *pErr = actual_err; + + return success; +} +#endif /* #ifndef MINIZ_NO_STDIO */ + +/* ------------------- .ZIP archive writing */ + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +static MZ_FORCEINLINE void mz_write_le16(mz_uint8 *p, mz_uint16 v) +{ + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); +} +static MZ_FORCEINLINE void mz_write_le32(mz_uint8 *p, mz_uint32 v) +{ + p[0] = (mz_uint8)v; + p[1] = (mz_uint8)(v >> 8); + p[2] = (mz_uint8)(v >> 16); + p[3] = (mz_uint8)(v >> 24); +} +static MZ_FORCEINLINE void mz_write_le64(mz_uint8 *p, mz_uint64 v) +{ + mz_write_le32(p, (mz_uint32)v); + mz_write_le32(p + sizeof(mz_uint32), (mz_uint32)(v >> 32)); +} + +#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) +#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) +#define MZ_WRITE_LE64(p, v) mz_write_le64((mz_uint8 *)(p), (mz_uint64)(v)) + +static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); + + if (!n) + return 0; + + /* An allocation this big is likely to just fail on 32-bit systems, so don't even go there. */ + if ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); + return 0; + } + + if (new_size > pState->m_mem_capacity) + { + void *pNew_block; + size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); + + while (new_capacity < new_size) + new_capacity *= 2; + + if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) + { + mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + return 0; + } + + pState->m_pMem = pNew_block; + pState->m_mem_capacity = new_capacity; + } + memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); + pState->m_mem_size = (size_t)new_size; + return n; +} + +static mz_bool mz_zip_writer_end_internal(mz_zip_archive *pZip, mz_bool set_last_error) +{ + mz_zip_internal_state *pState; + mz_bool status = MZ_TRUE; + + if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) + { + if (set_last_error) + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return MZ_FALSE; + } + + pState = pZip->m_pState; + pZip->m_pState = NULL; + mz_zip_array_clear(pZip, &pState->m_central_dir); + mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); + mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); + +#ifndef MINIZ_NO_STDIO + if (pState->m_pFile) + { + if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) + { + if (MZ_FCLOSE(pState->m_pFile) == EOF) + { + if (set_last_error) + mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); + status = MZ_FALSE; + } + } + + pState->m_pFile = NULL; + } +#endif /* #ifndef MINIZ_NO_STDIO */ + + if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); + pState->m_pMem = NULL; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pState); + pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; + return status; +} + +mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags) +{ + mz_bool zip64 = (flags & MZ_ZIP_FLAG_WRITE_ZIP64) != 0; + + if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) + { + if (!pZip->m_pRead) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + } + + if (pZip->m_file_offset_alignment) + { + /* Ensure user specified file offset alignment is a power of 2. */ + if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + } + + if (!pZip->m_pAlloc) + pZip->m_pAlloc = miniz_def_alloc_func; + if (!pZip->m_pFree) + pZip->m_pFree = miniz_def_free_func; + if (!pZip->m_pRealloc) + pZip->m_pRealloc = miniz_def_realloc_func; + + pZip->m_archive_size = existing_size; + pZip->m_central_directory_file_ofs = 0; + pZip->m_total_files = 0; + + if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); + + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); + MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); + + pZip->m_pState->m_zip64 = zip64; + pZip->m_pState->m_zip64_has_extended_info_fields = zip64; + + pZip->m_zip_type = MZ_ZIP_TYPE_USER; + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) +{ + return mz_zip_writer_init_v2(pZip, existing_size, 0); +} + +mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags) +{ + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pNeeds_keepalive = NULL; + + if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) + pZip->m_pRead = mz_zip_mem_read_func; + + pZip->m_pIO_opaque = pZip; + + if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) + return MZ_FALSE; + + pZip->m_zip_type = MZ_ZIP_TYPE_HEAP; + + if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) + { + if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) + { + mz_zip_writer_end_internal(pZip, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + pZip->m_pState->m_mem_capacity = initial_allocation_size; + } + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) +{ + return mz_zip_writer_init_heap_v2(pZip, size_to_reserve_at_beginning, initial_allocation_size, 0); +} + +#ifndef MINIZ_NO_STDIO +static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n) +{ + mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + + file_ofs += pZip->m_pState->m_file_archive_start_ofs; + + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_SEEK_FAILED); + return 0; + } + + return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); +} + +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning) +{ + return mz_zip_writer_init_file_v2(pZip, pFilename, size_to_reserve_at_beginning, 0); +} + +mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags) +{ + MZ_FILE *pFile; + + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pNeeds_keepalive = NULL; + + if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) + pZip->m_pRead = mz_zip_file_read_func; + + pZip->m_pIO_opaque = pZip; + + if (!mz_zip_writer_init_v2(pZip, size_to_reserve_at_beginning, flags)) + return MZ_FALSE; + + if (NULL == (pFile = MZ_FOPEN(pFilename, (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) ? "w+b" : "wb"))) + { + mz_zip_writer_end(pZip); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + } + + pZip->m_pState->m_pFile = pFile; + pZip->m_zip_type = MZ_ZIP_TYPE_FILE; + + if (size_to_reserve_at_beginning) + { + mz_uint64 cur_ofs = 0; + char buf[4096]; + + MZ_CLEAR_OBJ(buf); + + do + { + size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) + { + mz_zip_writer_end(pZip); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + cur_ofs += n; + size_to_reserve_at_beginning -= n; + } while (size_to_reserve_at_beginning); + } + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags) +{ + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pNeeds_keepalive = NULL; + + if (flags & MZ_ZIP_FLAG_WRITE_ALLOW_READING) + pZip->m_pRead = mz_zip_file_read_func; + + pZip->m_pIO_opaque = pZip; + + if (!mz_zip_writer_init_v2(pZip, 0, flags)) + return MZ_FALSE; + + pZip->m_pState->m_pFile = pFile; + pZip->m_pState->m_file_archive_start_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); + pZip->m_zip_type = MZ_ZIP_TYPE_CFILE; + + return MZ_TRUE; +} +#endif /* #ifndef MINIZ_NO_STDIO */ + +mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags) +{ + mz_zip_internal_state *pState; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (flags & MZ_ZIP_FLAG_WRITE_ZIP64) + { + /* We don't support converting a non-zip64 file to zip64 - this seems like more trouble than it's worth. (What about the existing 32-bit data descriptors that could follow the compressed data?) */ + if (!pZip->m_pState->m_zip64) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + } + + /* No sense in trying to write to an archive that's already at the support max size */ + if (pZip->m_pState->m_zip64) + { + if (pZip->m_total_files == MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + else + { + if (pZip->m_total_files == MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + + if ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); + } + + pState = pZip->m_pState; + + if (pState->m_pFile) + { +#ifdef MINIZ_NO_STDIO + (void)pFilename; + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); +#else + if (pZip->m_pIO_opaque != pZip) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (pZip->m_zip_type == MZ_ZIP_TYPE_FILE) + { + if (!pFilename) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + /* Archive is being read from stdio and was originally opened only for reading. Try to reopen as writable. */ + if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) + { + /* The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. */ + mz_zip_reader_end_internal(pZip, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + } + } + + pZip->m_pWrite = mz_zip_file_write_func; + pZip->m_pNeeds_keepalive = NULL; +#endif /* #ifdef MINIZ_NO_STDIO */ + } + else if (pState->m_pMem) + { + /* Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. */ + if (pZip->m_pIO_opaque != pZip) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState->m_mem_capacity = pState->m_mem_size; + pZip->m_pWrite = mz_zip_heap_write_func; + pZip->m_pNeeds_keepalive = NULL; + } + /* Archive is being read via a user provided read function - make sure the user has specified a write function too. */ + else if (!pZip->m_pWrite) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + /* Start writing new files at the archive's current central directory location. */ + /* TODO: We could add a flag that lets the user start writing immediately AFTER the existing central dir - this would be safer. */ + pZip->m_archive_size = pZip->m_central_directory_file_ofs; + pZip->m_central_directory_file_ofs = 0; + + /* Clear the sorted central dir offsets, they aren't useful or maintained now. */ + /* Even though we're now in write mode, files can still be extracted and verified, but file locates will be slow. */ + /* TODO: We could easily maintain the sorted central directory offsets. */ + mz_zip_array_clear(pZip, &pZip->m_pState->m_sorted_central_dir_offsets); + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename) +{ + return mz_zip_writer_init_from_reader_v2(pZip, pFilename, 0); +} + +/* TODO: pArchive_name is a terrible name here! */ +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags) +{ + return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); +} + +typedef struct +{ + mz_zip_archive *m_pZip; + mz_uint64 m_cur_archive_file_ofs; + mz_uint64 m_comp_size; +} mz_zip_writer_add_state; + +static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, void *pUser) +{ + mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; + if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) + return MZ_FALSE; + + pState->m_cur_archive_file_ofs += len; + pState->m_comp_size += len; + return MZ_TRUE; +} + +#define MZ_ZIP64_MAX_LOCAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 2) +#define MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE (sizeof(mz_uint16) * 2 + sizeof(mz_uint64) * 3) +static mz_uint32 mz_zip_writer_create_zip64_extra_data(mz_uint8 *pBuf, mz_uint64 *pUncomp_size, mz_uint64 *pComp_size, mz_uint64 *pLocal_header_ofs) +{ + mz_uint8 *pDst = pBuf; + mz_uint32 field_size = 0; + + MZ_WRITE_LE16(pDst + 0, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); + MZ_WRITE_LE16(pDst + 2, 0); + pDst += sizeof(mz_uint16) * 2; + + if (pUncomp_size) + { + MZ_WRITE_LE64(pDst, *pUncomp_size); + pDst += sizeof(mz_uint64); + field_size += sizeof(mz_uint64); + } + + if (pComp_size) + { + MZ_WRITE_LE64(pDst, *pComp_size); + pDst += sizeof(mz_uint64); + field_size += sizeof(mz_uint64); + } + + if (pLocal_header_ofs) + { + MZ_WRITE_LE64(pDst, *pLocal_header_ofs); + pDst += sizeof(mz_uint64); + field_size += sizeof(mz_uint64); + } + + MZ_WRITE_LE16(pBuf + 2, field_size); + + return (mz_uint32)(pDst - pBuf); +} + +static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); + MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, + mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, + mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, + mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, + mz_uint64 local_header_ofs, mz_uint32 ext_attributes) +{ + (void)pZip; + memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_MIN(comp_size, MZ_UINT32_MAX)); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_MIN(uncomp_size, MZ_UINT32_MAX)); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); + MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); + MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_MIN(local_header_ofs, MZ_UINT32_MAX)); + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, + const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, + mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, + mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, + mz_uint64 local_header_ofs, mz_uint32 ext_attributes, + const char *user_extra_data, mz_uint user_extra_data_len) +{ + mz_zip_internal_state *pState = pZip->m_pState; + mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; + size_t orig_central_dir_size = pState->m_central_dir.m_size; + mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + + if (!pZip->m_pState->m_zip64) + { + if (local_header_ofs > 0xFFFFFFFF) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_TOO_LARGE); + } + + /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ + if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + user_extra_data_len + comment_size) >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + + if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, (mz_uint16)(extra_size + user_extra_data_len), comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, user_extra_data, user_extra_data_len)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || + (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) + { + /* Try to resize the central directory array back into its original state. */ + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + return MZ_TRUE; +} + +static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) +{ + /* Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. */ + if (*pArchive_name == '/') + return MZ_FALSE; + + /* Making sure the name does not contain drive letters or DOS style backward slashes is the responsibility of the program using miniz*/ + + return MZ_TRUE; +} + +static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip) +{ + mz_uint32 n; + if (!pZip->m_file_offset_alignment) + return 0; + n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); + return (mz_uint)((pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1)); +} + +static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n) +{ + char buf[4096]; + memset(buf, 0, MZ_MIN(sizeof(buf), n)); + while (n) + { + mz_uint32 s = MZ_MIN(sizeof(buf), n); + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_file_ofs += s; + n -= s; + } + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) +{ + return mz_zip_writer_add_mem_ex_v2(pZip, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, uncomp_size, uncomp_crc32, NULL, NULL, 0, NULL, 0); +} + +mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, + mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, + const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) +{ + mz_uint16 method = 0, dos_time = 0, dos_date = 0; + mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; + mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + tdefl_compressor *pComp = NULL; + mz_bool store_data_uncompressed; + mz_zip_internal_state *pState; + mz_uint8 *pExtra_data = NULL; + mz_uint32 extra_size = 0; + mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; + mz_uint16 bit_flags = 0; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + + if (uncomp_size || (buf_size && !(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) + bit_flags |= MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; + + if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) + bit_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; + + level = level_and_flags & 0xF; + store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + if (pState->m_zip64) + { + if (pZip->m_total_files == MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + else + { + if (pZip->m_total_files == MZ_UINT16_MAX) + { + pState->m_zip64 = MZ_TRUE; + /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ + } + if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) + { + pState->m_zip64 = MZ_TRUE; + /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ + } + } + + if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); + +#ifndef MINIZ_NO_TIME + if (last_modified != NULL) + { + mz_zip_time_t_to_dos_time(*last_modified, &dos_time, &dos_date); + } + else + { + MZ_TIME_T cur_time; + time(&cur_time); + mz_zip_time_t_to_dos_time(cur_time, &dos_time, &dos_date); + } +#endif /* #ifndef MINIZ_NO_TIME */ + + if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); + uncomp_size = buf_size; + if (uncomp_size <= 3) + { + level = 0; + store_data_uncompressed = MZ_TRUE; + } + } + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ + if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + + if (!pState->m_zip64) + { + /* Bail early if the archive would obviously become too large */ + if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + user_extra_data_len + + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + user_extra_data_central_len + + MZ_ZIP_DATA_DESCRIPTER_SIZE32) > 0xFFFFFFFF) + { + pState->m_zip64 = MZ_TRUE; + /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ + } + } + + if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) + { + /* Set DOS Subdirectory attribute bit. */ + ext_attributes |= MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG; + + /* Subdirectories cannot contain data. */ + if ((buf_size) || (uncomp_size)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + } + + /* Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) */ + if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size + (pState->m_zip64 ? MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE : 0))) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + if ((!store_data_uncompressed) && (buf_size)) + { + if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return MZ_FALSE; + } + + local_dir_header_ofs += num_alignment_padding_bytes; + if (pZip->m_file_offset_alignment) + { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); + } + cur_archive_file_ofs += num_alignment_padding_bytes; + + MZ_CLEAR_OBJ(local_dir_header); + + if (!store_data_uncompressed || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) + { + method = MZ_DEFLATED; + } + + if (pState->m_zip64) + { + if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) + { + pExtra_data = extra_data; + extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, + (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); + } + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, bit_flags, dos_time, dos_date)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += sizeof(local_dir_header); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + cur_archive_file_ofs += archive_name_size; + + if (pExtra_data != NULL) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += extra_size; + } + } + else + { + if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, bit_flags, dos_time, dos_date)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += sizeof(local_dir_header); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + cur_archive_file_ofs += archive_name_size; + } + + if (user_extra_data_len > 0) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += user_extra_data_len; + } + + if (store_data_uncompressed) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + + cur_archive_file_ofs += buf_size; + comp_size = buf_size; + } + else if (buf_size) + { + mz_zip_writer_add_state state; + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || + (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + return mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pComp = NULL; + + if (uncomp_size) + { + mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; + mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; + + MZ_ASSERT(bit_flags & MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR); + + MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); + MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); + if (pExtra_data == NULL) + { + if (comp_size > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + MZ_WRITE_LE32(local_dir_footer + 8, comp_size); + MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); + } + else + { + MZ_WRITE_LE64(local_dir_footer + 8, comp_size); + MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); + local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; + } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) + return MZ_FALSE; + + cur_archive_file_ofs += local_dir_footer_size; + } + + if (pExtra_data != NULL) + { + extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, + (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); + } + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, + comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, + user_extra_data_central, user_extra_data_central_len)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) +{ + mz_uint16 gen_flags = MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR; + mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; + mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; + mz_uint64 local_dir_header_ofs, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = size_to_add, comp_size = 0; + size_t archive_name_size; + mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; + mz_uint8 *pExtra_data = NULL; + mz_uint32 extra_size = 0; + mz_uint8 extra_data[MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE]; + mz_zip_internal_state *pState; + mz_uint64 file_ofs = 0; + + if (!(level_and_flags & MZ_ZIP_FLAG_ASCII_FILENAME)) + gen_flags |= MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8; + + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + level = level_and_flags & 0xF; + + /* Sanity checks */ + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + if ((!pState->m_zip64) && (uncomp_size > MZ_UINT32_MAX)) + { + /* Source file is too large for non-zip64 */ + /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ + pState->m_zip64 = MZ_TRUE; + } + + /* We could support this, but why? */ + if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); + + if (pState->m_zip64) + { + if (pZip->m_total_files == MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + else + { + if (pZip->m_total_files == MZ_UINT16_MAX) + { + pState->m_zip64 = MZ_TRUE; + /*return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); */ + } + } + + archive_name_size = strlen(pArchive_name); + if (archive_name_size > MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_FILENAME); + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + /* miniz doesn't support central dirs >= MZ_UINT32_MAX bytes yet */ + if (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP64_MAX_CENTRAL_EXTRA_FIELD_SIZE + comment_size) >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + + if (!pState->m_zip64) + { + /* Bail early if the archive would obviously become too large */ + if ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + archive_name_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + + archive_name_size + comment_size + user_extra_data_len + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 1024 + + MZ_ZIP_DATA_DESCRIPTER_SIZE32 + user_extra_data_central_len) > 0xFFFFFFFF) + { + pState->m_zip64 = MZ_TRUE; + /*return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); */ + } + } + +#ifndef MINIZ_NO_TIME + if (pFile_time) + { + mz_zip_time_t_to_dos_time(*pFile_time, &dos_time, &dos_date); + } +#endif + + if (uncomp_size <= 3) + level = 0; + + if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes)) + { + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + + cur_archive_file_ofs += num_alignment_padding_bytes; + local_dir_header_ofs = cur_archive_file_ofs; + + if (pZip->m_file_offset_alignment) + { + MZ_ASSERT((cur_archive_file_ofs & (pZip->m_file_offset_alignment - 1)) == 0); + } + + if (uncomp_size && level) + { + method = MZ_DEFLATED; + } + + MZ_CLEAR_OBJ(local_dir_header); + if (pState->m_zip64) + { + if (uncomp_size >= MZ_UINT32_MAX || local_dir_header_ofs >= MZ_UINT32_MAX) + { + pExtra_data = extra_data; + extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, + (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); + } + + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)(extra_size + user_extra_data_len), 0, 0, 0, method, gen_flags, dos_time, dos_date)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += sizeof(local_dir_header); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + + cur_archive_file_ofs += archive_name_size; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, extra_data, extra_size) != extra_size) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += extra_size; + } + else + { + if ((comp_size > MZ_UINT32_MAX) || (cur_archive_file_ofs > MZ_UINT32_MAX)) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, (mz_uint16)user_extra_data_len, 0, 0, 0, method, gen_flags, dos_time, dos_date)) + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += sizeof(local_dir_header); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) + { + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + + cur_archive_file_ofs += archive_name_size; + } + + if (user_extra_data_len > 0) + { + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, user_extra_data, user_extra_data_len) != user_extra_data_len) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_archive_file_ofs += user_extra_data_len; + } + + if (uncomp_size) + { + mz_uint64 uncomp_remaining = uncomp_size; + void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); + if (!pRead_buf) + { + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (!level) + { + while (uncomp_remaining) + { + mz_uint n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); + if ((read_callback(callback_opaque, file_ofs, pRead_buf, n) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + file_ofs += n; + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); + uncomp_remaining -= n; + cur_archive_file_ofs += n; + } + comp_size = uncomp_size; + } + else + { + mz_bool result = MZ_FALSE; + mz_zip_writer_add_state state; + tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); + if (!pComp) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + state.m_pZip = pZip; + state.m_cur_archive_file_ofs = cur_archive_file_ofs; + state.m_comp_size = 0; + + if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + return mz_zip_set_error(pZip, MZ_ZIP_INTERNAL_ERROR); + } + + for (;;) + { + size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, (mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE); + tdefl_status status; + tdefl_flush flush = TDEFL_NO_FLUSH; + + if (read_callback(callback_opaque, file_ofs, pRead_buf, in_buf_size)!= in_buf_size) + { + mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + break; + } + + file_ofs += in_buf_size; + uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); + uncomp_remaining -= in_buf_size; + + if (pZip->m_pNeeds_keepalive != NULL && pZip->m_pNeeds_keepalive(pZip->m_pIO_opaque)) + flush = TDEFL_FULL_FLUSH; + + status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? flush : TDEFL_FINISH); + if (status == TDEFL_STATUS_DONE) + { + result = MZ_TRUE; + break; + } + else if (status != TDEFL_STATUS_OKAY) + { + mz_zip_set_error(pZip, MZ_ZIP_COMPRESSION_FAILED); + break; + } + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); + + if (!result) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + return MZ_FALSE; + } + + comp_size = state.m_comp_size; + cur_archive_file_ofs = state.m_cur_archive_file_ofs; + } + + pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); + } + + { + mz_uint8 local_dir_footer[MZ_ZIP_DATA_DESCRIPTER_SIZE64]; + mz_uint32 local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE32; + + MZ_WRITE_LE32(local_dir_footer + 0, MZ_ZIP_DATA_DESCRIPTOR_ID); + MZ_WRITE_LE32(local_dir_footer + 4, uncomp_crc32); + if (pExtra_data == NULL) + { + if (comp_size > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + MZ_WRITE_LE32(local_dir_footer + 8, comp_size); + MZ_WRITE_LE32(local_dir_footer + 12, uncomp_size); + } + else + { + MZ_WRITE_LE64(local_dir_footer + 8, comp_size); + MZ_WRITE_LE64(local_dir_footer + 16, uncomp_size); + local_dir_footer_size = MZ_ZIP_DATA_DESCRIPTER_SIZE64; + } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, local_dir_footer, local_dir_footer_size) != local_dir_footer_size) + return MZ_FALSE; + + cur_archive_file_ofs += local_dir_footer_size; + } + + if (pExtra_data != NULL) + { + extra_size = mz_zip_writer_create_zip64_extra_data(extra_data, (uncomp_size >= MZ_UINT32_MAX) ? &uncomp_size : NULL, + (uncomp_size >= MZ_UINT32_MAX) ? &comp_size : NULL, (local_dir_header_ofs >= MZ_UINT32_MAX) ? &local_dir_header_ofs : NULL); + } + + if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, pExtra_data, (mz_uint16)extra_size, pComment, comment_size, + uncomp_size, comp_size, uncomp_crc32, method, gen_flags, dos_time, dos_date, local_dir_header_ofs, ext_attributes, + user_extra_data_central, user_extra_data_central_len)) + return MZ_FALSE; + + pZip->m_total_files++; + pZip->m_archive_size = cur_archive_file_ofs; + + return MZ_TRUE; +} + +#ifndef MINIZ_NO_STDIO + +static size_t mz_file_read_func_stdio(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + MZ_FILE *pSrc_file = (MZ_FILE *)pOpaque; + mz_int64 cur_ofs = MZ_FTELL64(pSrc_file); + + if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pSrc_file, (mz_int64)file_ofs, SEEK_SET)))) + return 0; + + return MZ_FREAD(pBuf, 1, n, pSrc_file); +} + +mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + const char *user_extra_data, mz_uint user_extra_data_len, const char *user_extra_data_central, mz_uint user_extra_data_central_len) +{ + return mz_zip_writer_add_read_buf_callback(pZip, pArchive_name, mz_file_read_func_stdio, pSrc_file, size_to_add, pFile_time, pComment, comment_size, level_and_flags, + user_extra_data, user_extra_data_len, user_extra_data_central, user_extra_data_central_len); +} + +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + MZ_FILE *pSrc_file = NULL; + mz_uint64 uncomp_size = 0; + MZ_TIME_T file_modified_time; + MZ_TIME_T *pFile_time = NULL; + mz_bool status; + + memset(&file_modified_time, 0, sizeof(file_modified_time)); + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_STDIO) + pFile_time = &file_modified_time; + if (!mz_zip_get_file_modified_time(pSrc_filename, &file_modified_time)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_STAT_FAILED); +#endif + + pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); + if (!pSrc_file) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_OPEN_FAILED); + + MZ_FSEEK64(pSrc_file, 0, SEEK_END); + uncomp_size = MZ_FTELL64(pSrc_file); + MZ_FSEEK64(pSrc_file, 0, SEEK_SET); + + status = mz_zip_writer_add_cfile(pZip, pArchive_name, pSrc_file, uncomp_size, pFile_time, pComment, comment_size, level_and_flags, NULL, 0, NULL, 0); + + MZ_FCLOSE(pSrc_file); + + return status; +} +#endif /* #ifndef MINIZ_NO_STDIO */ + +static mz_bool mz_zip_writer_update_zip64_extension_block(mz_zip_array *pNew_ext, mz_zip_archive *pZip, const mz_uint8 *pExt, uint32_t ext_len, mz_uint64 *pComp_size, mz_uint64 *pUncomp_size, mz_uint64 *pLocal_header_ofs, mz_uint32 *pDisk_start) +{ + /* + 64 should be enough for any new zip64 data */ + if (!mz_zip_array_reserve(pZip, pNew_ext, ext_len + 64, MZ_FALSE)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + mz_zip_array_resize(pZip, pNew_ext, 0, MZ_FALSE); + + if ((pUncomp_size) || (pComp_size) || (pLocal_header_ofs) || (pDisk_start)) + { + mz_uint8 new_ext_block[64]; + mz_uint8 *pDst = new_ext_block; + mz_write_le16(pDst, MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID); + mz_write_le16(pDst + sizeof(mz_uint16), 0); + pDst += sizeof(mz_uint16) * 2; + + if (pUncomp_size) + { + mz_write_le64(pDst, *pUncomp_size); + pDst += sizeof(mz_uint64); + } + + if (pComp_size) + { + mz_write_le64(pDst, *pComp_size); + pDst += sizeof(mz_uint64); + } + + if (pLocal_header_ofs) + { + mz_write_le64(pDst, *pLocal_header_ofs); + pDst += sizeof(mz_uint64); + } + + if (pDisk_start) + { + mz_write_le32(pDst, *pDisk_start); + pDst += sizeof(mz_uint32); + } + + mz_write_le16(new_ext_block + sizeof(mz_uint16), (mz_uint16)((pDst - new_ext_block) - sizeof(mz_uint16) * 2)); + + if (!mz_zip_array_push_back(pZip, pNew_ext, new_ext_block, pDst - new_ext_block)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if ((pExt) && (ext_len)) + { + mz_uint32 extra_size_remaining = ext_len; + const mz_uint8 *pExtra_data = pExt; + + do + { + mz_uint32 field_id, field_data_size, field_total_size; + + if (extra_size_remaining < (sizeof(mz_uint16) * 2)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + field_id = MZ_READ_LE16(pExtra_data); + field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); + field_total_size = field_data_size + sizeof(mz_uint16) * 2; + + if (field_total_size > extra_size_remaining) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + if (field_id != MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) + { + if (!mz_zip_array_push_back(pZip, pNew_ext, pExtra_data, field_total_size)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + pExtra_data += field_total_size; + extra_size_remaining -= field_total_size; + } while (extra_size_remaining); + } + + return MZ_TRUE; +} + +/* TODO: This func is now pretty freakin complex due to zip64, split it up? */ +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index) +{ + mz_uint n, bit_flags, num_alignment_padding_bytes, src_central_dir_following_data_size; + mz_uint64 src_archive_bytes_remaining, local_dir_header_ofs; + mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; + mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; + mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; + mz_uint8 new_central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; + size_t orig_central_dir_size; + mz_zip_internal_state *pState; + void *pBuf; + const mz_uint8 *pSrc_central_header; + mz_zip_archive_file_stat src_file_stat; + mz_uint32 src_filename_len, src_comment_len, src_ext_len; + mz_uint32 local_header_filename_size, local_header_extra_len; + mz_uint64 local_header_comp_size, local_header_uncomp_size; + mz_bool found_zip64_ext_data_in_ldir = MZ_FALSE; + + /* Sanity checks */ + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pSource_zip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + /* Don't support copying files from zip64 archives to non-zip64, even though in some cases this is possible */ + if ((pSource_zip->m_pState->m_zip64) && (!pZip->m_pState->m_zip64)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + /* Get pointer to the source central dir header and crack it */ + if (NULL == (pSrc_central_header = mz_zip_get_cdh(pSource_zip, src_file_index))) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_SIG_OFS) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + src_filename_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS); + src_comment_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); + src_ext_len = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS); + src_central_dir_following_data_size = src_filename_len + src_ext_len + src_comment_len; + + /* TODO: We don't support central dir's >= MZ_UINT32_MAX bytes right now (+32 fudge factor in case we need to add more extra data) */ + if ((pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + 32) >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + + num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); + + if (!pState->m_zip64) + { + if (pZip->m_total_files == MZ_UINT16_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + else + { + /* TODO: Our zip64 support still has some 32-bit limits that may not be worth fixing. */ + if (pZip->m_total_files == MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + + if (!mz_zip_file_stat_internal(pSource_zip, src_file_index, pSrc_central_header, &src_file_stat, NULL)) + return MZ_FALSE; + + cur_src_file_ofs = src_file_stat.m_local_header_ofs; + cur_dst_file_ofs = pZip->m_archive_size; + + /* Read the source archive's local dir header */ + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + + if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + + cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + /* Compute the total size we need to copy (filename+extra data+compressed data) */ + local_header_filename_size = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS); + local_header_extra_len = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); + local_header_comp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS); + local_header_uncomp_size = MZ_READ_LE32(pLocal_header + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS); + src_archive_bytes_remaining = local_header_filename_size + local_header_extra_len + src_file_stat.m_comp_size; + + /* Try to find a zip64 extended information field */ + if ((local_header_extra_len) && ((local_header_comp_size == MZ_UINT32_MAX) || (local_header_uncomp_size == MZ_UINT32_MAX))) + { + mz_zip_array file_data_array; + const mz_uint8 *pExtra_data; + mz_uint32 extra_size_remaining = local_header_extra_len; + + mz_zip_array_init(&file_data_array, 1); + if (!mz_zip_array_resize(pZip, &file_data_array, local_header_extra_len, MZ_FALSE)) + { + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, src_file_stat.m_local_header_ofs + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + local_header_filename_size, file_data_array.m_p, local_header_extra_len) != local_header_extra_len) + { + mz_zip_array_clear(pZip, &file_data_array); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + + pExtra_data = (const mz_uint8 *)file_data_array.m_p; + + do + { + mz_uint32 field_id, field_data_size, field_total_size; + + if (extra_size_remaining < (sizeof(mz_uint16) * 2)) + { + mz_zip_array_clear(pZip, &file_data_array); + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + field_id = MZ_READ_LE16(pExtra_data); + field_data_size = MZ_READ_LE16(pExtra_data + sizeof(mz_uint16)); + field_total_size = field_data_size + sizeof(mz_uint16) * 2; + + if (field_total_size > extra_size_remaining) + { + mz_zip_array_clear(pZip, &file_data_array); + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + if (field_id == MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID) + { + const mz_uint8 *pSrc_field_data = pExtra_data + sizeof(mz_uint32); + + if (field_data_size < sizeof(mz_uint64) * 2) + { + mz_zip_array_clear(pZip, &file_data_array); + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_HEADER_OR_CORRUPTED); + } + + local_header_uncomp_size = MZ_READ_LE64(pSrc_field_data); + local_header_comp_size = MZ_READ_LE64(pSrc_field_data + sizeof(mz_uint64)); /* may be 0 if there's a descriptor */ + + found_zip64_ext_data_in_ldir = MZ_TRUE; + break; + } + + pExtra_data += field_total_size; + extra_size_remaining -= field_total_size; + } while (extra_size_remaining); + + mz_zip_array_clear(pZip, &file_data_array); + } + + if (!pState->m_zip64) + { + /* Try to detect if the new archive will most likely wind up too big and bail early (+(sizeof(mz_uint32) * 4) is for the optional descriptor which could be present, +64 is a fudge factor). */ + /* We also check when the archive is finalized so this doesn't need to be perfect. */ + mz_uint64 approx_new_archive_size = cur_dst_file_ofs + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + src_archive_bytes_remaining + (sizeof(mz_uint32) * 4) + + pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_central_dir_following_data_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE + 64; + + if (approx_new_archive_size >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + } + + /* Write dest archive padding */ + if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) + return MZ_FALSE; + + cur_dst_file_ofs += num_alignment_padding_bytes; + + local_dir_header_ofs = cur_dst_file_ofs; + if (pZip->m_file_offset_alignment) + { + MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); + } + + /* The original zip's local header+ext block doesn't change, even with zip64, so we can just copy it over to the dest zip */ + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; + + /* Copy over the source archive bytes to the dest archive, also ensure we have enough buf space to handle optional data descriptor */ + if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(32U, MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining))))) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + while (src_archive_bytes_remaining) + { + n = (mz_uint)MZ_MIN((mz_uint64)MZ_ZIP_MAX_IO_BUF_SIZE, src_archive_bytes_remaining); + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + cur_src_file_ofs += n; + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + cur_dst_file_ofs += n; + + src_archive_bytes_remaining -= n; + } + + /* Now deal with the optional data descriptor */ + bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); + if (bit_flags & 8) + { + /* Copy data descriptor */ + if ((pSource_zip->m_pState->m_zip64) || (found_zip64_ext_data_in_ldir)) + { + /* src is zip64, dest must be zip64 */ + + /* name uint32_t's */ + /* id 1 (optional in zip64?) */ + /* crc 1 */ + /* comp_size 2 */ + /* uncomp_size 2 */ + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, (sizeof(mz_uint32) * 6)) != (sizeof(mz_uint32) * 6)) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + + n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID) ? 6 : 5); + } + else + { + /* src is NOT zip64 */ + mz_bool has_id; + + if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED); + } + + has_id = (MZ_READ_LE32(pBuf) == MZ_ZIP_DATA_DESCRIPTOR_ID); + + if (pZip->m_pState->m_zip64) + { + /* dest is zip64, so upgrade the data descriptor */ + const mz_uint32 *pSrc_descriptor = (const mz_uint32 *)((const mz_uint8 *)pBuf + (has_id ? sizeof(mz_uint32) : 0)); + const mz_uint32 src_crc32 = pSrc_descriptor[0]; + const mz_uint64 src_comp_size = pSrc_descriptor[1]; + const mz_uint64 src_uncomp_size = pSrc_descriptor[2]; + + mz_write_le32((mz_uint8 *)pBuf, MZ_ZIP_DATA_DESCRIPTOR_ID); + mz_write_le32((mz_uint8 *)pBuf + sizeof(mz_uint32) * 1, src_crc32); + mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 2, src_comp_size); + mz_write_le64((mz_uint8 *)pBuf + sizeof(mz_uint32) * 4, src_uncomp_size); + + n = sizeof(mz_uint32) * 6; + } + else + { + /* dest is NOT zip64, just copy it as-is */ + n = sizeof(mz_uint32) * (has_id ? 4 : 3); + } + } + + if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) + { + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + } + + cur_src_file_ofs += n; + cur_dst_file_ofs += n; + } + pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); + + /* Finally, add the new central dir header */ + orig_central_dir_size = pState->m_central_dir.m_size; + + memcpy(new_central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); + + if (pState->m_zip64) + { + /* This is the painful part: We need to write a new central dir header + ext block with updated zip64 fields, and ensure the old fields (if any) are not included. */ + const mz_uint8 *pSrc_ext = pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len; + mz_zip_array new_ext_block; + + mz_zip_array_init(&new_ext_block, sizeof(mz_uint8)); + + MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, MZ_UINT32_MAX); + MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, MZ_UINT32_MAX); + MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, MZ_UINT32_MAX); + + if (!mz_zip_writer_update_zip64_extension_block(&new_ext_block, pZip, pSrc_ext, src_ext_len, &src_file_stat.m_comp_size, &src_file_stat.m_uncomp_size, &local_dir_header_ofs, NULL)) + { + mz_zip_array_clear(pZip, &new_ext_block); + return MZ_FALSE; + } + + MZ_WRITE_LE16(new_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS, new_ext_block.m_size); + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + { + mz_zip_array_clear(pZip, &new_ext_block); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_filename_len)) + { + mz_zip_array_clear(pZip, &new_ext_block); + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_ext_block.m_p, new_ext_block.m_size)) + { + mz_zip_array_clear(pZip, &new_ext_block); + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + src_filename_len + src_ext_len, src_comment_len)) + { + mz_zip_array_clear(pZip, &new_ext_block); + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + mz_zip_array_clear(pZip, &new_ext_block); + } + else + { + /* sanity checks */ + if (cur_dst_file_ofs > MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + if (local_dir_header_ofs >= MZ_UINT32_MAX) + return mz_zip_set_error(pZip, MZ_ZIP_ARCHIVE_TOO_LARGE); + + MZ_WRITE_LE32(new_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, new_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, src_central_dir_following_data_size)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + } + + /* This shouldn't trigger unless we screwed up during the initial sanity checks */ + if (pState->m_central_dir.m_size >= MZ_UINT32_MAX) + { + /* TODO: Support central dirs >= 32-bits in size */ + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_UNSUPPORTED_CDIR_SIZE); + } + + n = (mz_uint32)orig_central_dir_size; + if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) + { + mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); + return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED); + } + + pZip->m_total_files++; + pZip->m_archive_size = cur_dst_file_ofs; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) +{ + mz_zip_internal_state *pState; + mz_uint64 central_dir_ofs, central_dir_size; + mz_uint8 hdr[256]; + + if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + pState = pZip->m_pState; + + if (pState->m_zip64) + { + if ((pZip->m_total_files > MZ_UINT32_MAX) || (pState->m_central_dir.m_size >= MZ_UINT32_MAX)) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + else + { + if ((pZip->m_total_files > MZ_UINT16_MAX) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > MZ_UINT32_MAX)) + return mz_zip_set_error(pZip, MZ_ZIP_TOO_MANY_FILES); + } + + central_dir_ofs = 0; + central_dir_size = 0; + if (pZip->m_total_files) + { + /* Write central directory */ + central_dir_ofs = pZip->m_archive_size; + central_dir_size = pState->m_central_dir.m_size; + pZip->m_central_directory_file_ofs = central_dir_ofs; + if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + pZip->m_archive_size += central_dir_size; + } + + if (pState->m_zip64) + { + /* Write zip64 end of central directory header */ + mz_uint64 rel_ofs_to_zip64_ecdr = pZip->m_archive_size; + + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDH_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE - sizeof(mz_uint32) - sizeof(mz_uint64)); + MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS, 0x031E); /* TODO: always Unix */ + MZ_WRITE_LE16(hdr + MZ_ZIP64_ECDH_VERSION_NEEDED_OFS, 0x002D); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_SIZE_OFS, central_dir_size); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDH_CDIR_OFS_OFS, central_dir_ofs); + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE; + + /* Write zip64 end of central directory locator */ + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_SIG_OFS, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG); + MZ_WRITE_LE64(hdr + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS, rel_ofs_to_zip64_ecdr); + MZ_WRITE_LE32(hdr + MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS, 1); + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) != MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + + pZip->m_archive_size += MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE; + } + + /* Write end of central directory record */ + MZ_CLEAR_OBJ(hdr); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); + MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, MZ_MIN(MZ_UINT16_MAX, pZip->m_total_files)); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_size)); + MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, MZ_MIN(MZ_UINT32_MAX, central_dir_ofs)); + + if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_WRITE_FAILED); + +#ifndef MINIZ_NO_STDIO + if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) + return mz_zip_set_error(pZip, MZ_ZIP_FILE_CLOSE_FAILED); +#endif /* #ifndef MINIZ_NO_STDIO */ + + pZip->m_archive_size += MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE; + + pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; + return MZ_TRUE; +} + +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize) +{ + if ((!ppBuf) || (!pSize)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + *ppBuf = NULL; + *pSize = 0; + + if ((!pZip) || (!pZip->m_pState)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (pZip->m_pWrite != mz_zip_heap_write_func) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + if (!mz_zip_writer_finalize_archive(pZip)) + return MZ_FALSE; + + *ppBuf = pZip->m_pState->m_pMem; + *pSize = pZip->m_pState->m_mem_size; + pZip->m_pState->m_pMem = NULL; + pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; + + return MZ_TRUE; +} + +mz_bool mz_zip_writer_end(mz_zip_archive *pZip) +{ + return mz_zip_writer_end_internal(pZip, MZ_TRUE); +} + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags) +{ + return mz_zip_add_mem_to_archive_file_in_place_v2(pZip_filename, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, NULL); +} + +mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr) +{ + mz_bool status, created_new_archive = MZ_FALSE; + mz_zip_archive zip_archive; + struct MZ_FILE_STAT_STRUCT file_stat; + mz_zip_error actual_err = MZ_ZIP_NO_ERROR; + + mz_zip_zero_struct(&zip_archive); + if ((int)level_and_flags < 0) + level_and_flags = MZ_DEFAULT_LEVEL; + + if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) + { + if (pErr) + *pErr = MZ_ZIP_INVALID_PARAMETER; + return MZ_FALSE; + } + + if (!mz_zip_writer_validate_archive_name(pArchive_name)) + { + if (pErr) + *pErr = MZ_ZIP_INVALID_FILENAME; + return MZ_FALSE; + } + + /* Important: The regular non-64 bit version of stat() can fail here if the file is very large, which could cause the archive to be overwritten. */ + /* So be sure to compile with _LARGEFILE64_SOURCE 1 */ + if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) + { + /* Create a new archive. */ + if (!mz_zip_writer_init_file_v2(&zip_archive, pZip_filename, 0, level_and_flags)) + { + if (pErr) + *pErr = zip_archive.m_last_error; + return MZ_FALSE; + } + + created_new_archive = MZ_TRUE; + } + else + { + /* Append to an existing archive. */ + if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) + { + if (pErr) + *pErr = zip_archive.m_last_error; + return MZ_FALSE; + } + + if (!mz_zip_writer_init_from_reader_v2(&zip_archive, pZip_filename, level_and_flags)) + { + if (pErr) + *pErr = zip_archive.m_last_error; + + mz_zip_reader_end_internal(&zip_archive, MZ_FALSE); + + return MZ_FALSE; + } + } + + status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); + actual_err = zip_archive.m_last_error; + + /* Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) */ + if (!mz_zip_writer_finalize_archive(&zip_archive)) + { + if (!actual_err) + actual_err = zip_archive.m_last_error; + + status = MZ_FALSE; + } + + if (!mz_zip_writer_end_internal(&zip_archive, status)) + { + if (!actual_err) + actual_err = zip_archive.m_last_error; + + status = MZ_FALSE; + } + + if ((!status) && (created_new_archive)) + { + /* It's a new archive and something went wrong, so just delete it. */ + int ignoredStatus = MZ_DELETE_FILE(pZip_filename); + (void)ignoredStatus; + } + + if (pErr) + *pErr = actual_err; + + return status; +} + +void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr) +{ + mz_uint32 file_index; + mz_zip_archive zip_archive; + void *p = NULL; + + if (pSize) + *pSize = 0; + + if ((!pZip_filename) || (!pArchive_name)) + { + if (pErr) + *pErr = MZ_ZIP_INVALID_PARAMETER; + + return NULL; + } + + mz_zip_zero_struct(&zip_archive); + if (!mz_zip_reader_init_file_v2(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY, 0, 0)) + { + if (pErr) + *pErr = zip_archive.m_last_error; + + return NULL; + } + + if (mz_zip_reader_locate_file_v2(&zip_archive, pArchive_name, pComment, flags, &file_index)) + { + p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); + } + + mz_zip_reader_end_internal(&zip_archive, p != NULL); + + if (pErr) + *pErr = zip_archive.m_last_error; + + return p; +} + +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags) +{ + return mz_zip_extract_archive_file_to_heap_v2(pZip_filename, pArchive_name, NULL, pSize, flags, NULL); +} + +#endif /* #ifndef MINIZ_NO_STDIO */ + +#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ + +/* ------------------- Misc utils */ + +mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip) +{ + return pZip ? pZip->m_zip_mode : MZ_ZIP_MODE_INVALID; +} + +mz_zip_type mz_zip_get_type(mz_zip_archive *pZip) +{ + return pZip ? pZip->m_zip_type : MZ_ZIP_TYPE_INVALID; +} + +mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num) +{ + mz_zip_error prev_err; + + if (!pZip) + return MZ_ZIP_INVALID_PARAMETER; + + prev_err = pZip->m_last_error; + + pZip->m_last_error = err_num; + return prev_err; +} + +mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip) +{ + if (!pZip) + return MZ_ZIP_INVALID_PARAMETER; + + return pZip->m_last_error; +} + +mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip) +{ + return mz_zip_set_last_error(pZip, MZ_ZIP_NO_ERROR); +} + +mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip) +{ + mz_zip_error prev_err; + + if (!pZip) + return MZ_ZIP_INVALID_PARAMETER; + + prev_err = pZip->m_last_error; + + pZip->m_last_error = MZ_ZIP_NO_ERROR; + return prev_err; +} + +const char *mz_zip_get_error_string(mz_zip_error mz_err) +{ + switch (mz_err) + { + case MZ_ZIP_NO_ERROR: + return "no error"; + case MZ_ZIP_UNDEFINED_ERROR: + return "undefined error"; + case MZ_ZIP_TOO_MANY_FILES: + return "too many files"; + case MZ_ZIP_FILE_TOO_LARGE: + return "file too large"; + case MZ_ZIP_UNSUPPORTED_METHOD: + return "unsupported method"; + case MZ_ZIP_UNSUPPORTED_ENCRYPTION: + return "unsupported encryption"; + case MZ_ZIP_UNSUPPORTED_FEATURE: + return "unsupported feature"; + case MZ_ZIP_FAILED_FINDING_CENTRAL_DIR: + return "failed finding central directory"; + case MZ_ZIP_NOT_AN_ARCHIVE: + return "not a ZIP archive"; + case MZ_ZIP_INVALID_HEADER_OR_CORRUPTED: + return "invalid header or archive is corrupted"; + case MZ_ZIP_UNSUPPORTED_MULTIDISK: + return "unsupported multidisk archive"; + case MZ_ZIP_DECOMPRESSION_FAILED: + return "decompression failed or archive is corrupted"; + case MZ_ZIP_COMPRESSION_FAILED: + return "compression failed"; + case MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE: + return "unexpected decompressed size"; + case MZ_ZIP_CRC_CHECK_FAILED: + return "CRC-32 check failed"; + case MZ_ZIP_UNSUPPORTED_CDIR_SIZE: + return "unsupported central directory size"; + case MZ_ZIP_ALLOC_FAILED: + return "allocation failed"; + case MZ_ZIP_FILE_OPEN_FAILED: + return "file open failed"; + case MZ_ZIP_FILE_CREATE_FAILED: + return "file create failed"; + case MZ_ZIP_FILE_WRITE_FAILED: + return "file write failed"; + case MZ_ZIP_FILE_READ_FAILED: + return "file read failed"; + case MZ_ZIP_FILE_CLOSE_FAILED: + return "file close failed"; + case MZ_ZIP_FILE_SEEK_FAILED: + return "file seek failed"; + case MZ_ZIP_FILE_STAT_FAILED: + return "file stat failed"; + case MZ_ZIP_INVALID_PARAMETER: + return "invalid parameter"; + case MZ_ZIP_INVALID_FILENAME: + return "invalid filename"; + case MZ_ZIP_BUF_TOO_SMALL: + return "buffer too small"; + case MZ_ZIP_INTERNAL_ERROR: + return "internal error"; + case MZ_ZIP_FILE_NOT_FOUND: + return "file not found"; + case MZ_ZIP_ARCHIVE_TOO_LARGE: + return "archive is too large"; + case MZ_ZIP_VALIDATION_FAILED: + return "validation failed"; + case MZ_ZIP_WRITE_CALLBACK_FAILED: + return "write calledback failed"; + default: + break; + } + + return "unknown error"; +} + +/* Note: Just because the archive is not zip64 doesn't necessarily mean it doesn't have Zip64 extended information extra field, argh. */ +mz_bool mz_zip_is_zip64(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState)) + return MZ_FALSE; + + return pZip->m_pState->m_zip64; +} + +size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState)) + return 0; + + return pZip->m_pState->m_central_dir.m_size; +} + +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) +{ + return pZip ? pZip->m_total_files : 0; +} + +mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip) +{ + if (!pZip) + return 0; + return pZip->m_archive_size; +} + +mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState)) + return 0; + return pZip->m_pState->m_file_archive_start_ofs; +} + +MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip) +{ + if ((!pZip) || (!pZip->m_pState)) + return 0; + return pZip->m_pState->m_pFile; +} + +size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n) +{ + if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pZip->m_pRead)) + return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + + return pZip->m_pRead(pZip->m_pIO_opaque, file_ofs, pBuf, n); +} + +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size) +{ + mz_uint n; + const mz_uint8 *p = mz_zip_get_cdh(pZip, file_index); + if (!p) + { + if (filename_buf_size) + pFilename[0] = '\0'; + mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER); + return 0; + } + n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); + if (filename_buf_size) + { + n = MZ_MIN(n, filename_buf_size - 1); + memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); + pFilename[n] = '\0'; + } + return n + 1; +} + +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat) +{ + return mz_zip_file_stat_internal(pZip, file_index, mz_zip_get_cdh(pZip, file_index), pStat, NULL); +} + +mz_bool mz_zip_end(mz_zip_archive *pZip) +{ + if (!pZip) + return MZ_FALSE; + + if (pZip->m_zip_mode == MZ_ZIP_MODE_READING) + return mz_zip_reader_end(pZip); +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + else if ((pZip->m_zip_mode == MZ_ZIP_MODE_WRITING) || (pZip->m_zip_mode == MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)) + return mz_zip_writer_end(pZip); +#endif + + return MZ_FALSE; +} + +#ifdef __cplusplus +} +#endif + +#endif /*#ifndef MINIZ_NO_ARCHIVE_APIS*/ diff --git a/kram-profile/Source/miniz.h b/kram-profile/Source/miniz.h new file mode 100644 index 00000000..3100819f --- /dev/null +++ b/kram-profile/Source/miniz.h @@ -0,0 +1,1387 @@ +/* miniz.c 2.1.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing + See "unlicense" statement at the end of this file. + Rich Geldreich , last updated Oct. 13, 2013 + Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt + + Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define + MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). + + * Low-level Deflate/Inflate implementation notes: + + Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or + greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses + approximately as well as zlib. + + Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function + coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory + block large enough to hold the entire file. + + The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. + + * zlib-style API notes: + + miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in + zlib replacement in many apps: + The z_stream struct, optional memory allocation callbacks + deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound + inflateInit/inflateInit2/inflate/inflateReset/inflateEnd + compress, compress2, compressBound, uncompress + CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. + Supports raw deflate streams or standard zlib streams with adler-32 checking. + + Limitations: + The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. + I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but + there are no guarantees that miniz.c pulls this off perfectly. + + * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by + Alex Evans. Supports 1-4 bytes/pixel images. + + * ZIP archive API notes: + + The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to + get the job done with minimal fuss. There are simple API's to retrieve file information, read files from + existing archives, create new archives, append new files to existing archives, or clone archive data from + one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), + or you can specify custom file read/write callbacks. + + - Archive reading: Just call this function to read a single file from a disk archive: + + void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, + size_t *pSize, mz_uint zip_flags); + + For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central + directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. + + - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: + + int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); + + The locate operation can optionally check file comments too, which (as one example) can be used to identify + multiple versions of the same file in an archive. This function uses a simple linear search through the central + directory, so it's not very fast. + + Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and + retrieve detailed info on each file by calling mz_zip_reader_file_stat(). + + - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data + to disk and builds an exact image of the central directory in memory. The central directory image is written + all at once at the end of the archive file when the archive is finalized. + + The archive writer can optionally align each file's local header and file data to any power of 2 alignment, + which can be useful when the archive will be read from optical media. Also, the writer supports placing + arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still + readable by any ZIP tool. + + - Archive appending: The simple way to add a single file to an archive is to call this function: + + mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, + const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + + The archive will be created if it doesn't already exist, otherwise it'll be appended to. + Note the appending is done in-place and is not an atomic operation, so if something goes wrong + during the operation it's possible the archive could be left without a central directory (although the local + file headers and file data will be fine, so the archive will be recoverable). + + For more complex archive modification scenarios: + 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to + preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the + compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and + you're done. This is safe but requires a bunch of temporary disk space or heap memory. + + 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), + append new files as needed, then finalize the archive which will write an updated central directory to the + original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a + possibility that the archive's central directory could be lost with this method if anything goes wrong, though. + + - ZIP archive support limitations: + No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. + Requires streams capable of seeking. + + * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the + below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. + + * Important: For best perf. be sure to customize the below macros for your target platform: + #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 + #define MINIZ_LITTLE_ENDIAN 1 + #define MINIZ_HAS_64BIT_REGISTERS 1 + + * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz + uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files + (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). +*/ +#pragma once + +// Alec add this for now (move to define on projects?) +#if 1 + +// Make sure large file calls are used. Should be set across app. +#define _LARGEFILE64_SOURCE 1 + +// skip crc read checks to speed up reads +#define MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + +// not writing any zip content, just reading it +#define MINIZ_NO_ARCHIVE_WRITING_APIS + +// handling file io separately +#define MINIZ_NO_STDIO + +// These defines annoying conflict with everything (f.e. compress) +#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES + +#endif + + + +/* Defines to completely disable specific portions of miniz.c: + If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. */ + +/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */ +/*#define MINIZ_NO_STDIO */ + +/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */ +/* get/set file times, and the C run-time funcs that get/set times won't be called. */ +/* The current downside is the times written to your archives will be from 1979. */ +/*#define MINIZ_NO_TIME */ + +/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */ +/*#define MINIZ_NO_ARCHIVE_APIS */ + +/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */ +/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */ + +/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */ +/*#define MINIZ_NO_ZLIB_APIS */ + +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */ +/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ + +/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. + Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc + callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user + functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */ +/*#define MINIZ_NO_MALLOC */ + +#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) +/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */ +#define MINIZ_NO_TIME +#endif + +#include + +#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) +#include +#endif + +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) +/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */ +#define MINIZ_X86_OR_X64_CPU 1 +#else +#define MINIZ_X86_OR_X64_CPU 0 +#endif + +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU +/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */ +#define MINIZ_LITTLE_ENDIAN 1 +#else +#define MINIZ_LITTLE_ENDIAN 0 +#endif + +/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */ +#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES) +#if MINIZ_X86_OR_X64_CPU +/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */ +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 +#define MINIZ_UNALIGNED_USE_MEMCPY +#else +#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0 +#endif +#endif + +#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) +/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */ +#define MINIZ_HAS_64BIT_REGISTERS 1 +#else +#define MINIZ_HAS_64BIT_REGISTERS 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------- zlib-style API Definitions. */ + +/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */ +typedef unsigned long mz_ulong; + +/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */ +void mz_free(void *p); + +#define MZ_ADLER32_INIT (1) +/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */ +mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); + +#define MZ_CRC32_INIT (0) +/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */ +mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); + +/* Compression strategies. */ +enum +{ + MZ_DEFAULT_STRATEGY = 0, + MZ_FILTERED = 1, + MZ_HUFFMAN_ONLY = 2, + MZ_RLE = 3, + MZ_FIXED = 4 +}; + +/* Method */ +#define MZ_DEFLATED 8 + +/* Heap allocation callbacks. +Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. */ +typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); +typedef void (*mz_free_func)(void *opaque, void *address); +typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size); + +/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */ +enum +{ + MZ_NO_COMPRESSION = 0, + MZ_BEST_SPEED = 1, + MZ_BEST_COMPRESSION = 9, + MZ_UBER_COMPRESSION = 10, + MZ_DEFAULT_LEVEL = 6, + MZ_DEFAULT_COMPRESSION = -1 +}; + +#define MZ_VERSION "10.1.0" +#define MZ_VERNUM 0xA100 +#define MZ_VER_MAJOR 10 +#define MZ_VER_MINOR 1 +#define MZ_VER_REVISION 0 +#define MZ_VER_SUBREVISION 0 + +#ifndef MINIZ_NO_ZLIB_APIS + +/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */ +enum +{ + MZ_NO_FLUSH = 0, + MZ_PARTIAL_FLUSH = 1, + MZ_SYNC_FLUSH = 2, + MZ_FULL_FLUSH = 3, + MZ_FINISH = 4, + MZ_BLOCK = 5 +}; + +/* Return status codes. MZ_PARAM_ERROR is non-standard. */ +enum +{ + MZ_OK = 0, + MZ_STREAM_END = 1, + MZ_NEED_DICT = 2, + MZ_ERRNO = -1, + MZ_STREAM_ERROR = -2, + MZ_DATA_ERROR = -3, + MZ_MEM_ERROR = -4, + MZ_BUF_ERROR = -5, + MZ_VERSION_ERROR = -6, + MZ_PARAM_ERROR = -10000 +}; + +/* Window bits */ +#define MZ_DEFAULT_WINDOW_BITS 15 + +struct mz_internal_state; + +/* Compression/decompression stream struct. */ +typedef struct mz_stream_s +{ + const unsigned char *next_in; /* pointer to next byte to read */ + unsigned int avail_in; /* number of bytes available at next_in */ + mz_ulong total_in; /* total number of bytes consumed so far */ + + unsigned char *next_out; /* pointer to next byte to write */ + unsigned int avail_out; /* number of bytes that can be written to next_out */ + mz_ulong total_out; /* total number of bytes produced so far */ + + char *msg; /* error msg (unused) */ + struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */ + + mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */ + mz_free_func zfree; /* optional heap free function (defaults to free) */ + void *opaque; /* heap alloc function user pointer */ + + int data_type; /* data_type (unused) */ + mz_ulong adler; /* adler32 of the source or uncompressed data */ + mz_ulong reserved; /* not used */ +} mz_stream; + +typedef mz_stream *mz_streamp; + +/* Returns the version string of miniz.c. */ +const char *mz_version(void); + +/* mz_deflateInit() initializes a compressor with default options: */ +/* Parameters: */ +/* pStream must point to an initialized mz_stream struct. */ +/* level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */ +/* level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */ +/* (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if the input parameters are bogus. */ +/* MZ_MEM_ERROR on out of memory. */ +int mz_deflateInit(mz_streamp pStream, int level); + +/* mz_deflateInit2() is like mz_deflate(), except with more control: */ +/* Additional parameters: */ +/* method must be MZ_DEFLATED */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */ +/* mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */ +int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); + +/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */ +int mz_deflateReset(mz_streamp pStream); + +/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */ +/* Return values: */ +/* MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */ +/* MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */ +int mz_deflate(mz_streamp pStream, int flush); + +/* mz_deflateEnd() deinitializes a compressor: */ +/* Return values: */ +/* MZ_OK on success. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +int mz_deflateEnd(mz_streamp pStream); + +/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */ +mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); + +/* Single-call compression functions mz_compress() and mz_compress2(): */ +/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */ +int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); +int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level); + +/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */ +mz_ulong mz_compressBound(mz_ulong source_len); + +/* Initializes a decompressor. */ +int mz_inflateInit(mz_streamp pStream); + +/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */ +/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */ +int mz_inflateInit2(mz_streamp pStream, int window_bits); + +/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */ +int mz_inflateReset(mz_streamp pStream); + +/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */ +/* Parameters: */ +/* pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */ +/* flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */ +/* On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */ +/* MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */ +/* Return values: */ +/* MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */ +/* MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */ +/* MZ_STREAM_ERROR if the stream is bogus. */ +/* MZ_DATA_ERROR if the deflate stream is invalid. */ +/* MZ_PARAM_ERROR if one of the parameters is invalid. */ +/* MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */ +/* with more input data, or with more room in the output buffer (except when using single call decompression, described above). */ +int mz_inflate(mz_streamp pStream, int flush); + +/* Deinitializes a decompressor. */ +int mz_inflateEnd(mz_streamp pStream); + +/* Single-call decompression. */ +/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */ +int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len); + +/* Returns a string description of the specified error code, or NULL if the error code is invalid. */ +const char *mz_error(int err); + +/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */ +/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */ +#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES +typedef unsigned char Byte; +typedef unsigned int uInt; +typedef mz_ulong uLong; +typedef Byte Bytef; +typedef uInt uIntf; +typedef char charf; +typedef int intf; +typedef void *voidpf; +typedef uLong uLongf; +typedef void *voidp; +typedef void *const voidpc; +#define Z_NULL 0 +#define Z_NO_FLUSH MZ_NO_FLUSH +#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH +#define Z_SYNC_FLUSH MZ_SYNC_FLUSH +#define Z_FULL_FLUSH MZ_FULL_FLUSH +#define Z_FINISH MZ_FINISH +#define Z_BLOCK MZ_BLOCK +#define Z_OK MZ_OK +#define Z_STREAM_END MZ_STREAM_END +#define Z_NEED_DICT MZ_NEED_DICT +#define Z_ERRNO MZ_ERRNO +#define Z_STREAM_ERROR MZ_STREAM_ERROR +#define Z_DATA_ERROR MZ_DATA_ERROR +#define Z_MEM_ERROR MZ_MEM_ERROR +#define Z_BUF_ERROR MZ_BUF_ERROR +#define Z_VERSION_ERROR MZ_VERSION_ERROR +#define Z_PARAM_ERROR MZ_PARAM_ERROR +#define Z_NO_COMPRESSION MZ_NO_COMPRESSION +#define Z_BEST_SPEED MZ_BEST_SPEED +#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION +#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION +#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY +#define Z_FILTERED MZ_FILTERED +#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY +#define Z_RLE MZ_RLE +#define Z_FIXED MZ_FIXED +#define Z_DEFLATED MZ_DEFLATED +#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS +#define alloc_func mz_alloc_func +#define free_func mz_free_func +#define internal_state mz_internal_state +#define z_stream mz_stream +#define deflateInit mz_deflateInit +#define deflateInit2 mz_deflateInit2 +#define deflateReset mz_deflateReset +#define deflate mz_deflate +#define deflateEnd mz_deflateEnd +#define deflateBound mz_deflateBound +#define compress mz_compress +#define compress2 mz_compress2 +#define compressBound mz_compressBound +#define inflateInit mz_inflateInit +#define inflateInit2 mz_inflateInit2 +#define inflateReset mz_inflateReset +#define inflate mz_inflate +#define inflateEnd mz_inflateEnd +#define uncompress mz_uncompress +#define crc32 mz_crc32 +#define adler32 mz_adler32 +#define MAX_WBITS 15 +#define MAX_MEM_LEVEL 9 +#define zError mz_error +#define ZLIB_VERSION MZ_VERSION +#define ZLIB_VERNUM MZ_VERNUM +#define ZLIB_VER_MAJOR MZ_VER_MAJOR +#define ZLIB_VER_MINOR MZ_VER_MINOR +#define ZLIB_VER_REVISION MZ_VER_REVISION +#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION +#define zlibVersion mz_version +#define zlib_version mz_version() +#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */ + +#endif /* MINIZ_NO_ZLIB_APIS */ + +#ifdef __cplusplus +} +#endif +#pragma once +#include +#include +#include +#include + +/* ------------------- Types and macros */ +typedef unsigned char mz_uint8; +typedef signed short mz_int16; +typedef unsigned short mz_uint16; +typedef unsigned int mz_uint32; +typedef unsigned int mz_uint; +typedef int64_t mz_int64; +typedef uint64_t mz_uint64; +typedef int mz_bool; + +#define MZ_FALSE (0) +#define MZ_TRUE (1) + +/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */ +#ifdef _MSC_VER +#define MZ_MACRO_END while (0, 0) +#else +#define MZ_MACRO_END while (0) +#endif + +#ifdef MINIZ_NO_STDIO +#define MZ_FILE void * +#else +#include +#define MZ_FILE FILE +#endif /* #ifdef MINIZ_NO_STDIO */ + +#ifdef MINIZ_NO_TIME +typedef struct mz_dummy_time_t_tag +{ + int m_dummy; +} mz_dummy_time_t; +#define MZ_TIME_T mz_dummy_time_t +#else +#define MZ_TIME_T time_t +#endif + +#define MZ_ASSERT(x) assert(x) + +#ifdef MINIZ_NO_MALLOC +#define MZ_MALLOC(x) NULL +#define MZ_FREE(x) (void)x, ((void)0) +#define MZ_REALLOC(p, x) NULL +#else +#define MZ_MALLOC(x) malloc(x) +#define MZ_FREE(x) free(x) +#define MZ_REALLOC(p, x) realloc(p, x) +#endif + +#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) + +#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN +#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) +#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) +#else +#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) +#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) +#endif + +#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U)) + +#ifdef _MSC_VER +#define MZ_FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__)) +#else +#define MZ_FORCEINLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +extern void *miniz_def_alloc_func(void *opaque, size_t items, size_t size); +extern void miniz_def_free_func(void *opaque, void *address); +extern void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size); + +#define MZ_UINT16_MAX (0xFFFFU) +#define MZ_UINT32_MAX (0xFFFFFFFFU) + +#ifdef __cplusplus +} +#endif +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif +/* ------------------- Low-level Compression API Definitions */ + +/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */ +#define TDEFL_LESS_MEMORY 0 + +/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */ +/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */ +enum +{ + TDEFL_HUFFMAN_ONLY = 0, + TDEFL_DEFAULT_MAX_PROBES = 128, + TDEFL_MAX_PROBES_MASK = 0xFFF +}; + +/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */ +/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */ +/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */ +/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */ +/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */ +/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */ +/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */ +/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */ +/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */ +enum +{ + TDEFL_WRITE_ZLIB_HEADER = 0x01000, + TDEFL_COMPUTE_ADLER32 = 0x02000, + TDEFL_GREEDY_PARSING_FLAG = 0x04000, + TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, + TDEFL_RLE_MATCHES = 0x10000, + TDEFL_FILTER_MATCHES = 0x20000, + TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, + TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 +}; + +/* High level compression functions: */ +/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of source block to compress. */ +/* flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must free() the returned block when it's no longer needed. */ +void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */ +/* Returns 0 on failure. */ +size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +/* Compresses an image to a compressed PNG file in memory. */ +/* On entry: */ +/* pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */ +/* The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */ +/* level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */ +/* If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */ +/* On return: */ +/* Function returns a pointer to the compressed data, or NULL on failure. */ +/* *pLen_out will be set to the size of the PNG image file. */ +/* The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */ +void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip); +void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out); + +/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */ +typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); + +/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */ +mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +enum +{ + TDEFL_MAX_HUFF_TABLES = 3, + TDEFL_MAX_HUFF_SYMBOLS_0 = 288, + TDEFL_MAX_HUFF_SYMBOLS_1 = 32, + TDEFL_MAX_HUFF_SYMBOLS_2 = 19, + TDEFL_LZ_DICT_SIZE = 32768, + TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, + TDEFL_MIN_MATCH_LEN = 3, + TDEFL_MAX_MATCH_LEN = 258 +}; + +/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */ +#if TDEFL_LESS_MEMORY +enum +{ + TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 12, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#else +enum +{ + TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, + TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, + TDEFL_MAX_HUFF_SYMBOLS = 288, + TDEFL_LZ_HASH_BITS = 15, + TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, + TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, + TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS +}; +#endif + +/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */ +typedef enum { + TDEFL_STATUS_BAD_PARAM = -2, + TDEFL_STATUS_PUT_BUF_FAILED = -1, + TDEFL_STATUS_OKAY = 0, + TDEFL_STATUS_DONE = 1 +} tdefl_status; + +/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */ +typedef enum { + TDEFL_NO_FLUSH = 0, + TDEFL_SYNC_FLUSH = 2, + TDEFL_FULL_FLUSH = 3, + TDEFL_FINISH = 4 +} tdefl_flush; + +/* tdefl's compression state structure. */ +typedef struct +{ + tdefl_put_buf_func_ptr m_pPut_buf_func; + void *m_pPut_buf_user; + mz_uint m_flags, m_max_probes[2]; + int m_greedy_parsing; + mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; + mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; + mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; + mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; + tdefl_status m_prev_return_status; + const void *m_pIn_buf; + void *m_pOut_buf; + size_t *m_pIn_buf_size, *m_pOut_buf_size; + tdefl_flush m_flush; + const mz_uint8 *m_pSrc; + size_t m_src_buf_left, m_out_buf_ofs; + mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; + mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; + mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; + mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; + mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; + mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; +} tdefl_compressor; + +/* Initializes the compressor. */ +/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */ +/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */ +/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */ +/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */ +tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */ +tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush); + +/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */ +/* tdefl_compress_buffer() always consumes the entire input buffer. */ +tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush); + +tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); +mz_uint32 tdefl_get_adler32(tdefl_compressor *d); + +/* Create tdefl_compress() flags given zlib-style compression parameters. */ +/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */ +/* window_bits may be -15 (raw deflate) or 15 (zlib) */ +/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */ +mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tdefl_compressor structure in C so that */ +/* non-C language bindings to tdefl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tdefl_compressor *tdefl_compressor_alloc(void); +void tdefl_compressor_free(tdefl_compressor *pComp); +#endif + +#ifdef __cplusplus +} +#endif +#pragma once + +/* ------------------- Low-level Decompression API Definitions */ + +#ifdef __cplusplus +extern "C" { +#endif +/* Decompression flags used by tinfl_decompress(). */ +/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */ +/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */ +/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */ +/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */ +enum +{ + TINFL_FLAG_PARSE_ZLIB_HEADER = 1, + TINFL_FLAG_HAS_MORE_INPUT = 2, + TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, + TINFL_FLAG_COMPUTE_ADLER32 = 8 +}; + +/* High level decompression functions: */ +/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */ +/* On entry: */ +/* pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */ +/* On return: */ +/* Function returns a pointer to the decompressed data, or NULL on failure. */ +/* *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */ +/* The caller must call mz_free() on the returned block when it's no longer needed. */ +void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags); + +/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */ +/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */ +#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) +size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags); + +/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */ +/* Returns 1 on success or 0 on failure. */ +typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); +int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags); + +struct tinfl_decompressor_tag; +typedef struct tinfl_decompressor_tag tinfl_decompressor; + +#ifndef MINIZ_NO_MALLOC +/* Allocate the tinfl_decompressor structure in C so that */ +/* non-C language bindings to tinfl_ API don't need to worry about */ +/* structure size and allocation mechanism. */ +tinfl_decompressor *tinfl_decompressor_alloc(void); +void tinfl_decompressor_free(tinfl_decompressor *pDecomp); +#endif + +/* Max size of LZ dictionary. */ +#define TINFL_LZ_DICT_SIZE 32768 + +/* Return status. */ +typedef enum { + /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */ + /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */ + /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */ + TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4, + + /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */ + TINFL_STATUS_BAD_PARAM = -3, + + /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */ + TINFL_STATUS_ADLER32_MISMATCH = -2, + + /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */ + TINFL_STATUS_FAILED = -1, + + /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */ + + /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */ + /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */ + TINFL_STATUS_DONE = 0, + + /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */ + /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */ + /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */ + TINFL_STATUS_NEEDS_MORE_INPUT = 1, + + /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */ + /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */ + /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */ + /* so I may need to add some code to address this. */ + TINFL_STATUS_HAS_MORE_OUTPUT = 2 +} tinfl_status; + +/* Initializes the decompressor to its initial state. */ +#define tinfl_init(r) \ + do \ + { \ + (r)->m_state = 0; \ + } \ + MZ_MACRO_END +#define tinfl_get_adler32(r) (r)->m_check_adler32 + +/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */ +/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */ +tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags); + +/* Internal/private bits follow. */ +enum +{ + TINFL_MAX_HUFF_TABLES = 3, + TINFL_MAX_HUFF_SYMBOLS_0 = 288, + TINFL_MAX_HUFF_SYMBOLS_1 = 32, + TINFL_MAX_HUFF_SYMBOLS_2 = 19, + TINFL_FAST_LOOKUP_BITS = 10, + TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS +}; + +typedef struct +{ + mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; + mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; +} tinfl_huff_table; + +#if MINIZ_HAS_64BIT_REGISTERS +#define TINFL_USE_64BIT_BITBUF 1 +#else +#define TINFL_USE_64BIT_BITBUF 0 +#endif + +#if TINFL_USE_64BIT_BITBUF +typedef mz_uint64 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (64) +#else +typedef mz_uint32 tinfl_bit_buf_t; +#define TINFL_BITBUF_SIZE (32) +#endif + +struct tinfl_decompressor_tag +{ + mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; + tinfl_bit_buf_t m_bit_buf; + size_t m_dist_from_out_buf_start; + tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; + mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; +}; + +#ifdef __cplusplus +} +#endif + +#pragma once + + +/* ------------------- ZIP archive reading/writing */ + +#ifndef MINIZ_NO_ARCHIVE_APIS + +#ifdef __cplusplus +extern "C" { +#endif + +enum +{ + /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */ + MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, + MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512, + MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512 +}; + +typedef struct +{ + /* Central directory file index. */ + mz_uint32 m_file_index; + + /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */ + mz_uint64 m_central_dir_ofs; + + /* These fields are copied directly from the zip's central dir. */ + mz_uint16 m_version_made_by; + mz_uint16 m_version_needed; + mz_uint16 m_bit_flag; + mz_uint16 m_method; + +#ifndef MINIZ_NO_TIME + MZ_TIME_T m_time; +#endif + + /* CRC-32 of uncompressed data. */ + mz_uint32 m_crc32; + + /* File's compressed size. */ + mz_uint64 m_comp_size; + + /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */ + mz_uint64 m_uncomp_size; + + /* Zip internal and external file attributes. */ + mz_uint16 m_internal_attr; + mz_uint32 m_external_attr; + + /* Entry's local header file offset in bytes. */ + mz_uint64 m_local_header_ofs; + + /* Size of comment in bytes. */ + mz_uint32 m_comment_size; + + /* MZ_TRUE if the entry appears to be a directory. */ + mz_bool m_is_directory; + + /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */ + mz_bool m_is_encrypted; + + /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */ + mz_bool m_is_supported; + + /* Filename. If string ends in '/' it's a subdirectory entry. */ + /* Guaranteed to be zero terminated, may be truncated to fit. */ + char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; + + /* Comment field. */ + /* Guaranteed to be zero terminated, may be truncated to fit. */ + char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; + +} mz_zip_archive_file_stat; + +typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n); +typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n); +typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque); + +struct mz_zip_internal_state_tag; +typedef struct mz_zip_internal_state_tag mz_zip_internal_state; + +typedef enum { + MZ_ZIP_MODE_INVALID = 0, + MZ_ZIP_MODE_READING = 1, + MZ_ZIP_MODE_WRITING = 2, + MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 +} mz_zip_mode; + +typedef enum { + MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, + MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, + MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, + MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800, + MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */ + MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000, /* validate the local headers, but don't decompress the entire file and check the crc32 */ + MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000, /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */ + MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000, + MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000 +} mz_zip_flags; + +typedef enum { + MZ_ZIP_TYPE_INVALID = 0, + MZ_ZIP_TYPE_USER, + MZ_ZIP_TYPE_MEMORY, + MZ_ZIP_TYPE_HEAP, + MZ_ZIP_TYPE_FILE, + MZ_ZIP_TYPE_CFILE, + MZ_ZIP_TOTAL_TYPES +} mz_zip_type; + +/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */ +typedef enum { + MZ_ZIP_NO_ERROR = 0, + MZ_ZIP_UNDEFINED_ERROR, + MZ_ZIP_TOO_MANY_FILES, + MZ_ZIP_FILE_TOO_LARGE, + MZ_ZIP_UNSUPPORTED_METHOD, + MZ_ZIP_UNSUPPORTED_ENCRYPTION, + MZ_ZIP_UNSUPPORTED_FEATURE, + MZ_ZIP_FAILED_FINDING_CENTRAL_DIR, + MZ_ZIP_NOT_AN_ARCHIVE, + MZ_ZIP_INVALID_HEADER_OR_CORRUPTED, + MZ_ZIP_UNSUPPORTED_MULTIDISK, + MZ_ZIP_DECOMPRESSION_FAILED, + MZ_ZIP_COMPRESSION_FAILED, + MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE, + MZ_ZIP_CRC_CHECK_FAILED, + MZ_ZIP_UNSUPPORTED_CDIR_SIZE, + MZ_ZIP_ALLOC_FAILED, + MZ_ZIP_FILE_OPEN_FAILED, + MZ_ZIP_FILE_CREATE_FAILED, + MZ_ZIP_FILE_WRITE_FAILED, + MZ_ZIP_FILE_READ_FAILED, + MZ_ZIP_FILE_CLOSE_FAILED, + MZ_ZIP_FILE_SEEK_FAILED, + MZ_ZIP_FILE_STAT_FAILED, + MZ_ZIP_INVALID_PARAMETER, + MZ_ZIP_INVALID_FILENAME, + MZ_ZIP_BUF_TOO_SMALL, + MZ_ZIP_INTERNAL_ERROR, + MZ_ZIP_FILE_NOT_FOUND, + MZ_ZIP_ARCHIVE_TOO_LARGE, + MZ_ZIP_VALIDATION_FAILED, + MZ_ZIP_WRITE_CALLBACK_FAILED, + MZ_ZIP_TOTAL_ERRORS +} mz_zip_error; + +typedef struct mz_zip_archive +{ + mz_uint64 m_archive_size; + mz_uint64 m_central_directory_file_ofs; + + /* We only support up to UINT32_MAX files in zip64 mode. */ + mz_uint32 m_total_files; + mz_zip_mode m_zip_mode; + mz_zip_type m_zip_type; + mz_zip_error m_last_error; + + mz_uint64 m_file_offset_alignment; + + mz_alloc_func m_pAlloc; + mz_free_func m_pFree; + mz_realloc_func m_pRealloc; + void *m_pAlloc_opaque; + + mz_file_read_func m_pRead; + mz_file_write_func m_pWrite; + mz_file_needs_keepalive m_pNeeds_keepalive; + void *m_pIO_opaque; + + mz_zip_internal_state *m_pState; + +} mz_zip_archive; + +typedef struct +{ + mz_zip_archive *pZip; + mz_uint flags; + + int status; +#ifndef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS + mz_uint file_crc32; +#endif + mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs; + mz_zip_archive_file_stat file_stat; + void *pRead_buf; + void *pWrite_buf; + + size_t out_blk_remain; + + tinfl_decompressor inflator; + +} mz_zip_reader_extract_iter_state; + +// this is purely for looking at struct in debugger +typedef struct { + mz_uint32 local_file_signature; // 0x04034b50 read as LE number + mz_uint16 version; + mz_uint16 bit_flags; + mz_uint16 compression_method; + mz_uint16 modification_time; + mz_uint16 modification_date; // ugh, +2 bytes throws off alignment of remaining fields + mz_uint16 crc32a; // of uncompressed data + mz_uint16 crc32b; // of uncompressed data + mz_uint16 compressed_sizea; // 0xfffff for zip64 + mz_uint16 compressed_sizeb; // 0xfffff for zip64 + mz_uint16 uncompressed_sizea; // 0xfffff for zip64 + mz_uint16 uncompressed_sizeb; // 0xfffff for zip64 + mz_uint16 filename_length; + mz_uint16 extra_field_length; + // filename + // extra field +} mz_local_file_header; + +/* -------- ZIP reading */ + +/* Inits a ZIP archive reader. */ +/* These functions read and validate the archive's central directory. */ +mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags); + +mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +/* Read a archive from a disk file. */ +/* file_start_ofs is the file offset where the archive actually begins, or 0. */ +/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */ +mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags); +mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size); + +/* Read an archive from an already opened FILE, beginning at the current file position. */ +/* The archive is assumed to be archive_size bytes long. If archive_size is < 0, then the entire rest of the file is assumed to contain the archive. */ +/* The FILE will NOT be closed when mz_zip_reader_end() is called. */ +mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags); +#endif + +/* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */ +mz_bool mz_zip_reader_end(mz_zip_archive *pZip); + +/* -------- ZIP reading or writing */ + +/* Clears a mz_zip_archive struct to all zeros. */ +/* Important: This must be done before passing the struct to any mz_zip functions. */ +void mz_zip_zero_struct(mz_zip_archive *pZip); + +mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip); +mz_zip_type mz_zip_get_type(mz_zip_archive *pZip); + +/* Returns the total number of files in the archive. */ +mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); + +mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip); +mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip); +MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip); + +/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */ +size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n); + +/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */ +/* Note that the m_last_error functionality is not thread safe. */ +mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num); +mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip); +mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip); +mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip); +const char *mz_zip_get_error_string(mz_zip_error mz_err); + +/* MZ_TRUE if the archive file entry is a directory entry. */ +mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index); + +/* MZ_TRUE if the file is encrypted/strong encrypted. */ +mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index); + +/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */ +mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index); + +/* Retrieves the filename of an archive file entry. */ +/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */ +mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size); + +/* Attempts to locates a file in the archive's central directory. */ +/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */ +/* Returns -1 if the file cannot be found. */ +int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); +int mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index); + +/* Returns detailed information about an archive file entry. */ +mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat); + +/* MZ_TRUE if the file is in zip64 format. */ +/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */ +mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); + +/* Returns the total central directory size in bytes. */ +/* The current max supported size is <= MZ_UINT32_MAX. */ +size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); + +/* Alec change - if files are sorted by filename, then this returns the remap table for each fileIndex */ +/* This was previously internal state, so use with caution. It's an array of mz_uint32 */ +const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip); + +/* Extracts a archive file to a memory buffer using no memory allocation. */ +/* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ +mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); +mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); + +/* Extracts a archive file to a memory buffer. */ +mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags); + +/* Extracts a archive file to a dynamically allocated heap buffer. */ +/* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */ +/* Returns NULL and sets the last error on failure. */ +void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags); +void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags); + +/* Extracts a archive file using a callback function to output the file's data. */ +mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags); + +/* Extract a file iteratively */ +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); +mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); +size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size); +mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState); + +#ifndef MINIZ_NO_STDIO +/* Extracts a archive file to a disk file and sets its last accessed and modified times. */ +/* This function only extracts files, not archive directory records. */ +mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags); + +/* Extracts a archive file starting at the current position in the destination FILE stream. */ +mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags); +mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags); +#endif + +#if 0 +/* TODO */ + typedef void *mz_zip_streaming_extract_state_ptr; + mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); + uint64_t mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); + uint64_t mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); + mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, uint64_t new_ofs); + size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size); + mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState); +#endif + +/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */ +/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */ +mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags); + +/* Validates an entire archive by calling mz_zip_validate_file() on each file. */ +mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags); + +/* Misc utils/helpers, valid for ZIP reading or writing */ +mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr); +mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr); + +/* For memory-mapped archive, can directly access data using offset into the archive. No reads needed. */ +size_t mz_zip_reader_get_raw_data_offset(mz_zip_archive *pZip, mz_uint file_index); + +/* Return the start of the raw data, NULL if error */ +const uint8_t* mz_zip_reader_get_raw_data(mz_zip_archive *pZip, mz_uint file_index); + + +/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */ +mz_bool mz_zip_end(mz_zip_archive *pZip); + +/* -------- ZIP writing */ + +#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS + +/* Inits a ZIP archive writer. */ +/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/ +/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/ +mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); +mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags); + +mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); +mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags); + +#ifndef MINIZ_NO_STDIO +mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning); +mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags); +mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags); +#endif + +/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */ +/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */ +/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */ +/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */ +/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */ +/* the archive is finalized the file's central directory will be hosed. */ +mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename); +mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags); + +/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */ +/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags); + +/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */ +/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */ +mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); + +mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, + mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len, + const char *user_extra_data_central, mz_uint user_extra_data_central_len); + +/* Adds the contents of a file to an archive. This function also records the disk file's modified time into the archive. */ +/* File data is supplied via a read callback function. User mz_zip_writer_add_(c)file to add a file directly.*/ +mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 size_to_add, + const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, + const char *user_extra_data_central, mz_uint user_extra_data_central_len); + +#ifndef MINIZ_NO_STDIO +/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); + +/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */ +mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 size_to_add, + const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len, + const char *user_extra_data_central, mz_uint user_extra_data_central_len); +#endif + +/* Adds a file to an archive by fully cloning the data from another archive. */ +/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */ +mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index); + +/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */ +/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */ +/* An archive must be manually finalized by calling this function for it to be valid. */ +mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); + +/* Finalizes a heap archive, returning a poiner to the heap block and its size. */ +/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */ +mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize); + +/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */ +/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */ +mz_bool mz_zip_writer_end(mz_zip_archive *pZip); + +/* -------- Misc. high-level helper functions: */ + +/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */ +/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */ +/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */ +/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */ +mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); +mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr); + +/* Reads a single file from an archive into a heap block. */ +/* If pComment is not NULL, only the file with the specified comment will be extracted. */ +/* Returns NULL on failure. */ +void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags); +void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr); + +#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */ + +#ifdef __cplusplus +} +#endif + +#endif /* MINIZ_NO_ARCHIVE_APIS */ diff --git a/kram-profile/Source/track_event_parser.cpp b/kram-profile/Source/track_event_parser.cpp new file mode 100644 index 00000000..1da63ca1 --- /dev/null +++ b/kram-profile/Source/track_event_parser.cpp @@ -0,0 +1,1803 @@ +#if 0 // just for reference +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/trace_processor/importers/proto/track_event_parser.h" + +#include +#include +#include + +#include "perfetto/base/logging.h" +#include "perfetto/ext/base/base64.h" +#include "perfetto/ext/base/string_writer.h" +#include "perfetto/trace_processor/status.h" +#include "protos/perfetto/common/android_log_constants.pbzero.h" +#include "protos/perfetto/trace/extension_descriptor.pbzero.h" +#include "protos/perfetto/trace/interned_data/interned_data.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_active_processes.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_compositor_scheduler_state.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_histogram_sample.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_legacy_ipc.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_process_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/chrome_thread_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/counter_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/debug_annotation.pbzero.h" +#include "protos/perfetto/trace/track_event/log_message.pbzero.h" +#include "protos/perfetto/trace/track_event/process_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/source_location.pbzero.h" +#include "protos/perfetto/trace/track_event/task_execution.pbzero.h" +#include "protos/perfetto/trace/track_event/thread_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/track_descriptor.pbzero.h" +#include "protos/perfetto/trace/track_event/track_event.pbzero.h" +#include "src/trace_processor/importers/common/args_tracker.h" +#include "src/trace_processor/importers/common/args_translation_table.h" +#include "src/trace_processor/importers/common/event_tracker.h" +#include "src/trace_processor/importers/common/flow_tracker.h" +#include "src/trace_processor/importers/common/process_tracker.h" +#include "src/trace_processor/importers/common/track_tracker.h" +#include "src/trace_processor/importers/json/json_utils.h" +#include "src/trace_processor/importers/proto/packet_analyzer.h" +#include "src/trace_processor/importers/proto/packet_sequence_state.h" +#include "src/trace_processor/importers/proto/profile_packet_utils.h" +#include "src/trace_processor/importers/proto/stack_profile_sequence_state.h" +#include "src/trace_processor/importers/proto/track_event_tracker.h" +#include "src/trace_processor/util/debug_annotation_parser.h" +#include "src/trace_processor/util/proto_to_args_parser.h" +#include "src/trace_processor/util/status_macros.h" + +namespace perfetto { +namespace trace_processor { + +namespace { +using BoundInserter = ArgsTracker::BoundInserter; +using protos::pbzero::TrackEvent; +using LegacyEvent = TrackEvent::LegacyEvent; +using protozero::ConstBytes; + +// Slices which have been opened but haven't been closed yet will be marked +// with these placeholder values. +constexpr int64_t kPendingThreadDuration = -1; +constexpr int64_t kPendingThreadInstructionDelta = -1; + +class TrackEventArgsParser : public util::ProtoToArgsParser::Delegate { + public: + TrackEventArgsParser(int64_t packet_timestamp, + BoundInserter& inserter, + TraceStorage& storage, + PacketSequenceStateGeneration& sequence_state) + : packet_timestamp_(packet_timestamp), + inserter_(inserter), + storage_(storage), + sequence_state_(sequence_state) {} + + ~TrackEventArgsParser() override; + + using Key = util::ProtoToArgsParser::Key; + + void AddInteger(const Key& key, int64_t value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::Integer(value)); + } + void AddUnsignedInteger(const Key& key, uint64_t value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::UnsignedInteger(value)); + } + void AddString(const Key& key, const protozero::ConstChars& value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::String(storage_.InternString(value))); + } + void AddString(const Key& key, const std::string& value) final { + inserter_.AddArg( + storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::String(storage_.InternString(base::StringView(value)))); + } + void AddDouble(const Key& key, double value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::Real(value)); + } + void AddPointer(const Key& key, const void* value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::Pointer(reinterpret_cast(value))); + } + void AddBoolean(const Key& key, bool value) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::Boolean(value)); + } + void AddBytes(const Key& key, const protozero::ConstBytes& value) final { + std::string b64_data = base::Base64Encode(value.data, value.size); + AddString(key, b64_data); + } + bool AddJson(const Key& key, const protozero::ConstChars& value) final { + auto json_value = json::ParseJsonString(value); + if (!json_value) + return false; + return json::AddJsonValueToArgs(*json_value, base::StringView(key.flat_key), + base::StringView(key.key), &storage_, + &inserter_); + } + void AddNull(const Key& key) final { + inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)), + storage_.InternString(base::StringView(key.key)), + Variadic::Null()); + } + + size_t GetArrayEntryIndex(const std::string& array_key) final { + return inserter_.GetNextArrayEntryIndex( + storage_.InternString(base::StringView(array_key))); + } + + size_t IncrementArrayEntryIndex(const std::string& array_key) final { + return inserter_.IncrementArrayEntryIndex( + storage_.InternString(base::StringView(array_key))); + } + + InternedMessageView* GetInternedMessageView(uint32_t field_id, + uint64_t iid) final { + return sequence_state_.GetInternedMessageView(field_id, iid); + } + + int64_t packet_timestamp() final { return packet_timestamp_; } + + PacketSequenceStateGeneration* seq_state() final { return &sequence_state_; } + + private: + int64_t packet_timestamp_; + BoundInserter& inserter_; + TraceStorage& storage_; + PacketSequenceStateGeneration& sequence_state_; +}; + +TrackEventArgsParser::~TrackEventArgsParser() = default; + +// Paths on Windows use backslash rather than slash as a separator. +// Normalise the paths by replacing backslashes with slashes to make it +// easier to write cross-platform scripts. +std::string NormalizePathSeparators(const protozero::ConstChars& path) { + std::string result(path.data, path.size); + for (char& c : result) { + if (c == '\\') + c = '/'; + } + return result; +} + +std::optional MaybeParseUnsymbolizedSourceLocation( + std::string prefix, + const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + auto* decoder = delegate.GetInternedMessage( + protos::pbzero::InternedData::kUnsymbolizedSourceLocations, + field.as_uint64()); + if (!decoder) { + // Lookup failed fall back on default behaviour which will just put + // the iid into the args table. + return std::nullopt; + } + // Interned mapping_id loses it's meaning when the sequence ends. So we need + // to get an id from stack_profile_mapping table. + auto mapping_id = delegate.seq_state() + ->GetOrCreate() + ->FindOrInsertMapping(decoder->mapping_id()); + if (!mapping_id) { + return std::nullopt; + } + delegate.AddUnsignedInteger( + util::ProtoToArgsParser::Key(prefix + ".mapping_id"), mapping_id->value); + delegate.AddUnsignedInteger(util::ProtoToArgsParser::Key(prefix + ".rel_pc"), + decoder->rel_pc()); + return base::OkStatus(); +} + +std::optional MaybeParseSourceLocation( + std::string prefix, + const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + auto* decoder = delegate.GetInternedMessage( + protos::pbzero::InternedData::kSourceLocations, field.as_uint64()); + if (!decoder) { + // Lookup failed fall back on default behaviour which will just put + // the source_location_iid into the args table. + return std::nullopt; + } + + delegate.AddString(util::ProtoToArgsParser::Key(prefix + ".file_name"), + NormalizePathSeparators(decoder->file_name())); + delegate.AddString(util::ProtoToArgsParser::Key(prefix + ".function_name"), + decoder->function_name()); + if (decoder->has_line_number()) { + delegate.AddInteger(util::ProtoToArgsParser::Key(prefix + ".line_number"), + decoder->line_number()); + } + + return base::OkStatus(); +} + +protos::pbzero::AndroidLogPriority ToAndroidLogPriority( + protos::pbzero::LogMessage::Priority prio) { + switch (prio) { + case protos::pbzero::LogMessage::Priority::PRIO_UNSPECIFIED: + return protos::pbzero::AndroidLogPriority::PRIO_UNSPECIFIED; + case protos::pbzero::LogMessage::Priority::PRIO_UNUSED: + return protos::pbzero::AndroidLogPriority::PRIO_UNUSED; + case protos::pbzero::LogMessage::Priority::PRIO_VERBOSE: + return protos::pbzero::AndroidLogPriority::PRIO_VERBOSE; + case protos::pbzero::LogMessage::Priority::PRIO_DEBUG: + return protos::pbzero::AndroidLogPriority::PRIO_DEBUG; + case protos::pbzero::LogMessage::Priority::PRIO_INFO: + return protos::pbzero::AndroidLogPriority::PRIO_INFO; + case protos::pbzero::LogMessage::Priority::PRIO_WARN: + return protos::pbzero::AndroidLogPriority::PRIO_WARN; + case protos::pbzero::LogMessage::Priority::PRIO_ERROR: + return protos::pbzero::AndroidLogPriority::PRIO_ERROR; + case protos::pbzero::LogMessage::Priority::PRIO_FATAL: + return protos::pbzero::AndroidLogPriority::PRIO_FATAL; + } + return protos::pbzero::AndroidLogPriority::PRIO_UNSPECIFIED; +} + +} // namespace + +class TrackEventParser::EventImporter { + public: + EventImporter(TrackEventParser* parser, + int64_t ts, + const TrackEventData* event_data, + ConstBytes blob, + uint32_t packet_sequence_id) + : context_(parser->context_), + track_event_tracker_(parser->track_event_tracker_), + storage_(context_->storage.get()), + parser_(parser), + args_translation_table_(context_->args_translation_table.get()), + ts_(ts), + event_data_(event_data), + sequence_state_(event_data->trace_packet_data.sequence_state.get()), + blob_(std::move(blob)), + event_(blob_), + legacy_event_(event_.legacy_event()), + defaults_(event_data->trace_packet_data.sequence_state + ->GetTrackEventDefaults()), + thread_timestamp_(event_data->thread_timestamp), + thread_instruction_count_(event_data->thread_instruction_count), + packet_sequence_id_(packet_sequence_id) {} + + util::Status Import() { + // TODO(eseckler): This legacy event field will eventually be replaced by + // fields in TrackEvent itself. + if (PERFETTO_UNLIKELY(!event_.type() && !legacy_event_.has_phase())) + return util::ErrStatus("TrackEvent without type or phase"); + + category_id_ = ParseTrackEventCategory(); + name_id_ = ParseTrackEventName(); + + if (context_->content_analyzer) { + PacketAnalyzer::SampleAnnotation annotation; + annotation.push_back({parser_->event_category_key_id_, category_id_}); + annotation.push_back({parser_->event_name_key_id_, name_id_}); + PacketAnalyzer::Get(context_)->ProcessPacket( + event_data_->trace_packet_data.packet, annotation); + } + + RETURN_IF_ERROR(ParseTrackAssociation()); + + // Counter-type events don't support arguments (those are on the + // CounterDescriptor instead). All they have is a |{double_,}counter_value|. + if (event_.type() == TrackEvent::TYPE_COUNTER) { + ParseCounterEvent(); + return util::OkStatus(); + } + + // If we have legacy thread time / instruction count fields, also parse them + // into the counters tables. + ParseLegacyThreadTimeAndInstructionsAsCounters(); + + // Parse extra counter values before parsing the actual event. This way, we + // can update the slice's thread time / instruction count fields based on + // these counter values and also parse them as slice attributes / arguments. + ParseExtraCounterValues(); + + // TODO(eseckler): Replace phase with type and remove handling of + // legacy_event_.phase() once it is no longer used by producers. + char phase = static_cast(ParsePhaseOrType()); + + switch (phase) { + case 'B': // TRACE_EVENT_PHASE_BEGIN. + return ParseThreadBeginEvent(); + case 'E': // TRACE_EVENT_PHASE_END. + return ParseThreadEndEvent(); + case 'X': // TRACE_EVENT_PHASE_COMPLETE. + return ParseThreadCompleteEvent(); + case 's': // TRACE_EVENT_PHASE_FLOW_BEGIN. + case 't': // TRACE_EVENT_PHASE_FLOW_STEP. + case 'f': // TRACE_EVENT_PHASE_FLOW_END. + return ParseFlowEventV1(phase); + case 'i': + case 'I': // TRACE_EVENT_PHASE_INSTANT. + case 'R': // TRACE_EVENT_PHASE_MARK. + return ParseThreadInstantEvent(phase); + case 'b': // TRACE_EVENT_PHASE_NESTABLE_ASYNC_BEGIN + case 'S': + return ParseAsyncBeginEvent(phase); + case 'e': // TRACE_EVENT_PHASE_NESTABLE_ASYNC_END + case 'F': + return ParseAsyncEndEvent(); + case 'n': // TRACE_EVENT_PHASE_NESTABLE_ASYNC_INSTANT + return ParseAsyncInstantEvent(); + case 'T': + case 'p': + return ParseAsyncStepEvent(phase); + case 'M': // TRACE_EVENT_PHASE_METADATA (process and thread names). + return ParseMetadataEvent(); + default: + // Other events are proxied via the raw table for JSON export. + return ParseLegacyEventAsRawEvent(); + } + } + + private: + StringId ParseTrackEventCategory() { + StringId category_id = kNullStringId; + + std::vector category_iids; + for (auto it = event_.category_iids(); it; ++it) { + category_iids.push_back(*it); + } + std::vector category_strings; + for (auto it = event_.categories(); it; ++it) { + category_strings.push_back(*it); + } + + // If there's a single category, we can avoid building a concatenated + // string. + if (PERFETTO_LIKELY(category_iids.size() == 1 && + category_strings.empty())) { + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kEventCategoriesFieldNumber, + protos::pbzero::EventCategory>(category_iids[0]); + if (decoder) { + category_id = storage_->InternString(decoder->name()); + } else { + char buffer[32]; + base::StringWriter writer(buffer, sizeof(buffer)); + writer.AppendLiteral("unknown("); + writer.AppendUnsignedInt(category_iids[0]); + writer.AppendChar(')'); + category_id = storage_->InternString(writer.GetStringView()); + } + } else if (category_iids.empty() && category_strings.size() == 1) { + category_id = storage_->InternString(category_strings[0]); + } else if (category_iids.size() + category_strings.size() > 1) { + // We concatenate the category strings together since we currently only + // support a single "cat" column. + // TODO(eseckler): Support multi-category events in the table schema. + std::string categories; + for (uint64_t iid : category_iids) { + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kEventCategoriesFieldNumber, + protos::pbzero::EventCategory>(iid); + if (!decoder) + continue; + base::StringView name = decoder->name(); + if (!categories.empty()) + categories.append(","); + categories.append(name.data(), name.size()); + } + for (const protozero::ConstChars& cat : category_strings) { + if (!categories.empty()) + categories.append(","); + categories.append(cat.data, cat.size); + } + if (!categories.empty()) + category_id = storage_->InternString(base::StringView(categories)); + } + + return category_id; + } + + StringId ParseTrackEventName() { + uint64_t name_iid = event_.name_iid(); + if (!name_iid) + name_iid = legacy_event_.name_iid(); + + if (PERFETTO_LIKELY(name_iid)) { + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kEventNamesFieldNumber, + protos::pbzero::EventName>(name_iid); + if (decoder) + return storage_->InternString(decoder->name()); + } else if (event_.has_name()) { + return storage_->InternString(event_.name()); + } + + return kNullStringId; + } + + util::Status ParseTrackAssociation() { + TrackTracker* track_tracker = context_->track_tracker.get(); + ProcessTracker* procs = context_->process_tracker.get(); + + // Consider track_uuid from the packet and TrackEventDefaults, fall back to + // the default descriptor track (uuid 0). + track_uuid_ = event_.has_track_uuid() + ? event_.track_uuid() + : (defaults_ && defaults_->has_track_uuid() + ? defaults_->track_uuid() + : 0u); + + // Determine track from track_uuid specified in either TrackEvent or + // TrackEventDefaults. If a non-default track is not set, we either: + // a) fall back to the track specified by the sequence's (or event's) pid + // + tid (only in case of legacy tracks/events, i.e. events that don't + // specify an explicit track uuid or use legacy event phases instead of + // TrackEvent types), or + // b) a default track. + if (track_uuid_) { + std::optional opt_track_id = + track_event_tracker_->GetDescriptorTrack(track_uuid_, name_id_, + packet_sequence_id_); + if (!opt_track_id) { + track_event_tracker_->ReserveDescriptorChildTrack(track_uuid_, + /*parent_uuid=*/0, + name_id_); + opt_track_id = track_event_tracker_->GetDescriptorTrack( + track_uuid_, name_id_, packet_sequence_id_); + } + track_id_ = *opt_track_id; + + auto thread_track_row = + storage_->thread_track_table().id().IndexOf(track_id_); + if (thread_track_row) { + utid_ = storage_->thread_track_table().utid()[*thread_track_row]; + upid_ = storage_->thread_table().upid()[*utid_]; + } else { + auto process_track_row = + storage_->process_track_table().id().IndexOf(track_id_); + if (process_track_row) { + upid_ = storage_->process_track_table().upid()[*process_track_row]; + if (sequence_state_->state()->pid_and_tid_valid()) { + uint32_t pid = + static_cast(sequence_state_->state()->pid()); + uint32_t tid = + static_cast(sequence_state_->state()->tid()); + UniqueTid utid_candidate = procs->UpdateThread(tid, pid); + if (storage_->thread_table().upid()[utid_candidate] == upid_) + legacy_passthrough_utid_ = utid_candidate; + } + } else { + auto* tracks = context_->storage->mutable_track_table(); + auto track_index = tracks->id().IndexOf(track_id_); + if (track_index) { + const StringPool::Id& id = tracks->name()[*track_index]; + if (id.is_null()) + tracks->mutable_name()->Set(*track_index, name_id_); + } + + if (sequence_state_->state()->pid_and_tid_valid()) { + uint32_t pid = + static_cast(sequence_state_->state()->pid()); + uint32_t tid = + static_cast(sequence_state_->state()->tid()); + legacy_passthrough_utid_ = procs->UpdateThread(tid, pid); + } + } + } + } else { + bool pid_tid_state_valid = sequence_state_->state()->pid_and_tid_valid(); + + // We have a 0-value |track_uuid|. Nevertheless, we should only fall back + // if we have either no |track_uuid| specified at all or |track_uuid| was + // set explicitly to 0 (e.g. to override a default track_uuid) and we have + // a legacy phase. Events with real phases should use |track_uuid| to + // specify a different track (or use the pid/tid_override fields). + bool fallback_to_legacy_pid_tid_tracks = + (!event_.has_track_uuid() || !event_.has_type()) && + pid_tid_state_valid; + + // Always allow fallback if we have a process override. + fallback_to_legacy_pid_tid_tracks |= legacy_event_.has_pid_override(); + + // A thread override requires a valid pid. + fallback_to_legacy_pid_tid_tracks |= + legacy_event_.has_tid_override() && pid_tid_state_valid; + + if (fallback_to_legacy_pid_tid_tracks) { + uint32_t pid = static_cast(sequence_state_->state()->pid()); + uint32_t tid = static_cast(sequence_state_->state()->tid()); + if (legacy_event_.has_pid_override()) { + pid = static_cast(legacy_event_.pid_override()); + tid = static_cast(-1); + } + if (legacy_event_.has_tid_override()) + tid = static_cast(legacy_event_.tid_override()); + + utid_ = procs->UpdateThread(tid, pid); + upid_ = storage_->thread_table().upid()[*utid_]; + track_id_ = track_tracker->InternThreadTrack(*utid_); + } else { + track_id_ = track_event_tracker_->GetOrCreateDefaultDescriptorTrack(); + } + } + + if (!legacy_event_.has_phase()) + return util::OkStatus(); + + // Legacy phases may imply a different track than the one specified by + // the fallback (or default track uuid) above. + switch (legacy_event_.phase()) { + case 'b': + case 'e': + case 'n': + case 'S': + case 'T': + case 'p': + case 'F': { + // Intern tracks for legacy async events based on legacy event ids. + int64_t source_id = 0; + bool source_id_is_process_scoped = false; + if (legacy_event_.has_unscoped_id()) { + source_id = static_cast(legacy_event_.unscoped_id()); + } else if (legacy_event_.has_global_id()) { + source_id = static_cast(legacy_event_.global_id()); + } else if (legacy_event_.has_local_id()) { + if (!upid_) { + return util::ErrStatus( + "TrackEvent with local_id without process association"); + } + + source_id = static_cast(legacy_event_.local_id()); + source_id_is_process_scoped = true; + } else { + return util::ErrStatus("Async LegacyEvent without ID"); + } + + // Catapult treats nestable async events of different categories with + // the same ID as separate tracks. We replicate the same behavior + // here. For legacy async events, it uses different tracks based on + // event names. + const bool legacy_async = + legacy_event_.phase() == 'S' || legacy_event_.phase() == 'T' || + legacy_event_.phase() == 'p' || legacy_event_.phase() == 'F'; + StringId id_scope = legacy_async ? name_id_ : category_id_; + if (legacy_event_.has_id_scope()) { + std::string concat = storage_->GetString(category_id_).ToStdString() + + ":" + legacy_event_.id_scope().ToStdString(); + id_scope = storage_->InternString(base::StringView(concat)); + } + + track_id_ = context_->track_tracker->InternLegacyChromeAsyncTrack( + name_id_, upid_.value_or(0), source_id, source_id_is_process_scoped, + id_scope); + legacy_passthrough_utid_ = utid_; + break; + } + case 'i': + case 'I': { + // Intern tracks for global or process-scoped legacy instant events. + switch (legacy_event_.instant_event_scope()) { + case LegacyEvent::SCOPE_UNSPECIFIED: + case LegacyEvent::SCOPE_THREAD: + // Thread-scoped legacy instant events already have the right + // track based on the tid/pid of the sequence. + if (!utid_) { + return util::ErrStatus( + "Thread-scoped instant event without thread association"); + } + break; + case LegacyEvent::SCOPE_GLOBAL: + track_id_ = context_->track_tracker + ->GetOrCreateLegacyChromeGlobalInstantTrack(); + legacy_passthrough_utid_ = utid_; + utid_ = std::nullopt; + break; + case LegacyEvent::SCOPE_PROCESS: + if (!upid_) { + return util::ErrStatus( + "Process-scoped instant event without process association"); + } + + track_id_ = + context_->track_tracker->InternLegacyChromeProcessInstantTrack( + *upid_); + legacy_passthrough_utid_ = utid_; + utid_ = std::nullopt; + break; + } + break; + } + default: + break; + } + + return util::OkStatus(); + } + + int32_t ParsePhaseOrType() { + if (legacy_event_.has_phase()) + return legacy_event_.phase(); + + switch (event_.type()) { + case TrackEvent::TYPE_SLICE_BEGIN: + return utid_ ? 'B' : 'b'; + case TrackEvent::TYPE_SLICE_END: + return utid_ ? 'E' : 'e'; + case TrackEvent::TYPE_INSTANT: + return utid_ ? 'i' : 'n'; + default: + PERFETTO_ELOG("unexpected event type %d", event_.type()); + return 0; + } + } + + void ParseCounterEvent() { + // Tokenizer ensures that TYPE_COUNTER events are associated with counter + // tracks and have values. + PERFETTO_DCHECK(storage_->counter_track_table().id().IndexOf(track_id_)); + PERFETTO_DCHECK(event_.has_counter_value() || + event_.has_double_counter_value()); + + context_->event_tracker->PushCounter( + ts_, static_cast(event_data_->counter_value), track_id_); + } + + void ParseLegacyThreadTimeAndInstructionsAsCounters() { + if (!utid_) + return; + // When these fields are set, we don't expect TrackDescriptor-based counters + // for thread time or instruction count for this thread in the trace, so we + // intern separate counter tracks based on name + utid. Note that we cannot + // import the counter values from the end of a complete event, because the + // EventTracker expects counters to be pushed in order of their timestamps. + // One more reason to switch to split begin/end events. + if (thread_timestamp_) { + TrackId track_id = context_->track_tracker->InternThreadCounterTrack( + parser_->counter_name_thread_time_id_, *utid_); + context_->event_tracker->PushCounter( + ts_, static_cast(*thread_timestamp_), track_id); + } + if (thread_instruction_count_) { + TrackId track_id = context_->track_tracker->InternThreadCounterTrack( + parser_->counter_name_thread_instruction_count_id_, *utid_); + context_->event_tracker->PushCounter( + ts_, static_cast(*thread_instruction_count_), track_id); + } + } + + void ParseExtraCounterValues() { + if (!event_.has_extra_counter_values() && + !event_.has_extra_double_counter_values()) { + return; + } + + // Add integer extra counter values. + size_t index = 0; + protozero::RepeatedFieldIterator track_uuid_it; + if (event_.has_extra_counter_track_uuids()) { + track_uuid_it = event_.extra_counter_track_uuids(); + } else if (defaults_ && defaults_->has_extra_counter_track_uuids()) { + track_uuid_it = defaults_->extra_counter_track_uuids(); + } + for (auto value_it = event_.extra_counter_values(); value_it; + ++value_it, ++track_uuid_it, ++index) { + AddExtraCounterValue(track_uuid_it, index); + } + + // Add double extra counter values. + track_uuid_it = protozero::RepeatedFieldIterator(); + if (event_.has_extra_double_counter_track_uuids()) { + track_uuid_it = event_.extra_double_counter_track_uuids(); + } else if (defaults_ && defaults_->has_extra_double_counter_track_uuids()) { + track_uuid_it = defaults_->extra_double_counter_track_uuids(); + } + for (auto value_it = event_.extra_double_counter_values(); value_it; + ++value_it, ++track_uuid_it, ++index) { + AddExtraCounterValue(track_uuid_it, index); + } + } + + void AddExtraCounterValue( + protozero::RepeatedFieldIterator track_uuid_it, + size_t index) { + // Tokenizer ensures that there aren't more values than uuids, that we + // don't have more values than kMaxNumExtraCounters and that the + // track_uuids are for valid counter tracks. + PERFETTO_DCHECK(track_uuid_it); + PERFETTO_DCHECK(index < TrackEventData::kMaxNumExtraCounters); + + std::optional track_id = track_event_tracker_->GetDescriptorTrack( + *track_uuid_it, kNullStringId, packet_sequence_id_); + std::optional counter_row = + storage_->counter_track_table().id().IndexOf(*track_id); + + double value = event_data_->extra_counter_values[index]; + context_->event_tracker->PushCounter(ts_, value, *track_id); + + // Also import thread_time and thread_instruction_count counters into + // slice columns to simplify JSON export. + StringId counter_name = + storage_->counter_track_table().name()[*counter_row]; + if (counter_name == parser_->counter_name_thread_time_id_) { + thread_timestamp_ = static_cast(value); + } else if (counter_name == + parser_->counter_name_thread_instruction_count_id_) { + thread_instruction_count_ = static_cast(value); + } + } + + util::Status ParseThreadBeginEvent() { + if (!utid_) { + return util::ErrStatus( + "TrackEvent with phase B without thread association"); + } + + auto* thread_slices = storage_->mutable_slice_table(); + auto opt_slice_id = context_->slice_tracker->BeginTyped( + thread_slices, MakeThreadSliceRow(), + [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); }); + + if (opt_slice_id.has_value()) { + MaybeParseFlowEvents(opt_slice_id.value()); + } + return util::OkStatus(); + } + + util::Status ParseThreadEndEvent() { + if (!utid_) { + return util::ErrStatus( + "TrackEvent with phase E without thread association"); + } + auto opt_slice_id = context_->slice_tracker->End( + ts_, track_id_, category_id_, name_id_, + [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); }); + if (!opt_slice_id) + return base::OkStatus(); + + MaybeParseFlowEvents(*opt_slice_id); + auto* thread_slices = storage_->mutable_slice_table(); + auto opt_thread_slice_ref = thread_slices->FindById(*opt_slice_id); + if (!opt_thread_slice_ref) { + // This means that the end event did not match a corresponding track event + // begin packet so we likely closed the wrong slice. There's not much we + // can do about this beyond flag it as a stat. + context_->storage->IncrementStats(stats::track_event_thread_invalid_end); + return base::OkStatus(); + } + + tables::SliceTable::RowReference slice_ref = *opt_thread_slice_ref; + std::optional tts = slice_ref.thread_ts(); + if (tts) { + PERFETTO_DCHECK(thread_timestamp_); + slice_ref.set_thread_dur(*thread_timestamp_ - *tts); + } + std::optional tic = slice_ref.thread_instruction_count(); + if (tic) { + PERFETTO_DCHECK(event_data_->thread_instruction_count); + slice_ref.set_thread_instruction_delta( + *event_data_->thread_instruction_count - *tic); + } + return util::OkStatus(); + } + + util::Status ParseThreadCompleteEvent() { + if (!utid_) { + return util::ErrStatus( + "TrackEvent with phase X without thread association"); + } + + auto duration_ns = legacy_event_.duration_us() * 1000; + if (duration_ns < 0) + return util::ErrStatus("TrackEvent with phase X with negative duration"); + + auto* thread_slices = storage_->mutable_slice_table(); + tables::SliceTable::Row row = MakeThreadSliceRow(); + row.dur = duration_ns; + if (legacy_event_.has_thread_duration_us()) { + row.thread_dur = legacy_event_.thread_duration_us() * 1000; + } + if (legacy_event_.has_thread_instruction_delta()) { + row.thread_instruction_delta = legacy_event_.thread_instruction_delta(); + } + auto opt_slice_id = context_->slice_tracker->ScopedTyped( + thread_slices, std::move(row), + [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); }); + + if (opt_slice_id.has_value()) { + MaybeParseFlowEvents(opt_slice_id.value()); + } + return util::OkStatus(); + } + + std::optional GetLegacyEventId() { + if (legacy_event_.has_unscoped_id()) + return legacy_event_.unscoped_id(); + // TODO(andrewbb): Catapult doesn't support global_id and local_id on flow + // events. We could add support in trace processor (e.g. because there seem + // to be some callsites supplying local_id in chromium), but we would have + // to consider the process ID for local IDs and use a separate ID scope for + // global_id and unscoped_id. + return std::nullopt; + } + + util::Status ParseFlowEventV1(char phase) { + auto opt_source_id = GetLegacyEventId(); + if (!opt_source_id) { + storage_->IncrementStats(stats::flow_invalid_id); + return util::ErrStatus("Invalid id for flow event v1"); + } + FlowId flow_id = context_->flow_tracker->GetFlowIdForV1Event( + opt_source_id.value(), category_id_, name_id_); + switch (phase) { + case 's': + context_->flow_tracker->Begin(track_id_, flow_id); + break; + case 't': + context_->flow_tracker->Step(track_id_, flow_id); + break; + case 'f': + context_->flow_tracker->End(track_id_, flow_id, + legacy_event_.bind_to_enclosing(), + /* close_flow = */ false); + break; + } + return util::OkStatus(); + } + + void MaybeParseTrackEventFlows(SliceId slice_id) { + if (event_.has_flow_ids_old() || event_.has_flow_ids()) { + auto it = + event_.has_flow_ids() ? event_.flow_ids() : event_.flow_ids_old(); + for (; it; ++it) { + FlowId flow_id = *it; + if (!context_->flow_tracker->IsActive(flow_id)) { + context_->flow_tracker->Begin(slice_id, flow_id); + continue; + } + context_->flow_tracker->Step(slice_id, flow_id); + } + } + if (event_.has_terminating_flow_ids_old() || + event_.has_terminating_flow_ids()) { + auto it = event_.has_terminating_flow_ids() + ? event_.terminating_flow_ids() + : event_.terminating_flow_ids_old(); + for (; it; ++it) { + FlowId flow_id = *it; + if (!context_->flow_tracker->IsActive(flow_id)) { + // If we should terminate a flow, do not begin a new one if it's not + // active already. + continue; + } + context_->flow_tracker->End(slice_id, flow_id, + /* close_flow = */ true); + } + } + } + + void MaybeParseFlowEventV2(SliceId slice_id) { + if (!legacy_event_.has_bind_id()) { + return; + } + if (!legacy_event_.has_flow_direction()) { + storage_->IncrementStats(stats::flow_without_direction); + return; + } + + auto bind_id = legacy_event_.bind_id(); + switch (legacy_event_.flow_direction()) { + case LegacyEvent::FLOW_OUT: + context_->flow_tracker->Begin(slice_id, bind_id); + break; + case LegacyEvent::FLOW_INOUT: + context_->flow_tracker->Step(slice_id, bind_id); + break; + case LegacyEvent::FLOW_IN: + context_->flow_tracker->End(slice_id, bind_id, + /* close_flow = */ false); + break; + default: + storage_->IncrementStats(stats::flow_without_direction); + } + } + + void MaybeParseFlowEvents(SliceId slice_id) { + MaybeParseFlowEventV2(slice_id); + MaybeParseTrackEventFlows(slice_id); + } + + util::Status ParseThreadInstantEvent(char phase) { + // Handle instant events as slices with zero duration, so that they end + // up nested underneath their parent slices. + int64_t duration_ns = 0; + int64_t tidelta = 0; + std::optional opt_slice_id; + auto args_inserter = [this, phase](BoundInserter* inserter) { + ParseTrackEventArgs(inserter); + // For legacy MARK event, add phase for JSON exporter. + if (phase == 'R') { + std::string phase_string(1, static_cast(phase)); + StringId phase_id = storage_->InternString(phase_string.c_str()); + inserter->AddArg(parser_->legacy_event_phase_key_id_, + Variadic::String(phase_id)); + } + }; + if (utid_) { + auto* thread_slices = storage_->mutable_slice_table(); + tables::SliceTable::Row row = MakeThreadSliceRow(); + row.dur = duration_ns; + if (thread_timestamp_) { + row.thread_dur = duration_ns; + } + if (thread_instruction_count_) { + row.thread_instruction_delta = tidelta; + } + opt_slice_id = context_->slice_tracker->ScopedTyped( + thread_slices, row, std::move(args_inserter)); + } else { + opt_slice_id = context_->slice_tracker->Scoped( + ts_, track_id_, category_id_, name_id_, duration_ns, + std::move(args_inserter)); + } + if (!opt_slice_id.has_value()) { + return util::OkStatus(); + } + MaybeParseFlowEvents(opt_slice_id.value()); + return util::OkStatus(); + } + + util::Status ParseAsyncBeginEvent(char phase) { + auto args_inserter = [this, phase](BoundInserter* inserter) { + ParseTrackEventArgs(inserter); + + if (phase == 'b') + return; + PERFETTO_DCHECK(phase == 'S'); + // For legacy ASYNC_BEGIN, add phase for JSON exporter. + std::string phase_string(1, static_cast(phase)); + StringId phase_id = storage_->InternString(phase_string.c_str()); + inserter->AddArg(parser_->legacy_event_phase_key_id_, + Variadic::String(phase_id)); + }; + auto opt_slice_id = context_->slice_tracker->Begin( + ts_, track_id_, category_id_, name_id_, args_inserter); + if (!opt_slice_id.has_value()) { + return util::OkStatus(); + } + MaybeParseFlowEvents(opt_slice_id.value()); + // For the time being, we only create vtrack slice rows if we need to + // store thread timestamps/counters. + if (legacy_event_.use_async_tts()) { + auto* vtrack_slices = storage_->mutable_virtual_track_slices(); + PERFETTO_DCHECK(!vtrack_slices->slice_count() || + vtrack_slices->slice_ids().back() < opt_slice_id.value()); + int64_t tts = thread_timestamp_.value_or(0); + int64_t tic = thread_instruction_count_.value_or(0); + vtrack_slices->AddVirtualTrackSlice(opt_slice_id.value(), tts, + kPendingThreadDuration, tic, + kPendingThreadInstructionDelta); + } + return util::OkStatus(); + } + + util::Status ParseAsyncEndEvent() { + auto opt_slice_id = context_->slice_tracker->End( + ts_, track_id_, category_id_, name_id_, + [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); }); + if (!opt_slice_id) + return base::OkStatus(); + + MaybeParseFlowEvents(*opt_slice_id); + if (legacy_event_.use_async_tts()) { + auto* vtrack_slices = storage_->mutable_virtual_track_slices(); + int64_t tts = event_data_->thread_timestamp.value_or(0); + int64_t tic = event_data_->thread_instruction_count.value_or(0); + vtrack_slices->UpdateThreadDeltasForSliceId(*opt_slice_id, tts, tic); + } + return util::OkStatus(); + } + + util::Status ParseAsyncStepEvent(char phase) { + // Parse step events as instant events. Reconstructing the begin/end times + // of the child slice would be too complicated, see b/178540838. For JSON + // export, we still record the original step's phase in an arg. + int64_t duration_ns = 0; + context_->slice_tracker->Scoped( + ts_, track_id_, category_id_, name_id_, duration_ns, + [this, phase](BoundInserter* inserter) { + ParseTrackEventArgs(inserter); + + PERFETTO_DCHECK(phase == 'T' || phase == 'p'); + std::string phase_string(1, static_cast(phase)); + StringId phase_id = storage_->InternString(phase_string.c_str()); + inserter->AddArg(parser_->legacy_event_phase_key_id_, + Variadic::String(phase_id)); + }); + // Step events don't support thread timestamps, so no need to add a row to + // virtual_track_slices. + return util::OkStatus(); + } + + util::Status ParseAsyncInstantEvent() { + // Handle instant events as slices with zero duration, so that they end + // up nested underneath their parent slices. + int64_t duration_ns = 0; + int64_t tidelta = 0; + auto opt_slice_id = context_->slice_tracker->Scoped( + ts_, track_id_, category_id_, name_id_, duration_ns, + [this](BoundInserter* inserter) { ParseTrackEventArgs(inserter); }); + if (!opt_slice_id.has_value()) { + return util::OkStatus(); + } + MaybeParseFlowEvents(opt_slice_id.value()); + if (legacy_event_.use_async_tts()) { + auto* vtrack_slices = storage_->mutable_virtual_track_slices(); + PERFETTO_DCHECK(!vtrack_slices->slice_count() || + vtrack_slices->slice_ids().back() < opt_slice_id.value()); + int64_t tts = thread_timestamp_.value_or(0); + int64_t tic = thread_instruction_count_.value_or(0); + vtrack_slices->AddVirtualTrackSlice(opt_slice_id.value(), tts, + duration_ns, tic, tidelta); + } + return util::OkStatus(); + } + + util::Status ParseMetadataEvent() { + ProcessTracker* procs = context_->process_tracker.get(); + + if (name_id_ == kNullStringId) + return util::ErrStatus("Metadata event without name"); + + // Parse process and thread names from correspondingly named events. + NullTermStringView event_name = storage_->GetString(name_id_); + PERFETTO_DCHECK(event_name.data()); + if (strcmp(event_name.c_str(), "thread_name") == 0) { + if (!utid_) { + return util::ErrStatus( + "thread_name metadata event without thread association"); + } + + auto it = event_.debug_annotations(); + if (!it) { + return util::ErrStatus( + "thread_name metadata event without debug annotations"); + } + protos::pbzero::DebugAnnotation::Decoder annotation(*it); + auto thread_name = annotation.string_value(); + if (!thread_name.size) + return util::OkStatus(); + auto thread_name_id = storage_->InternString(thread_name); + procs->UpdateThreadNameByUtid( + *utid_, thread_name_id, + ThreadNamePriority::kTrackDescriptorThreadType); + return util::OkStatus(); + } + if (strcmp(event_name.c_str(), "process_name") == 0) { + if (!upid_) { + return util::ErrStatus( + "process_name metadata event without process association"); + } + + auto it = event_.debug_annotations(); + if (!it) { + return util::ErrStatus( + "process_name metadata event without debug annotations"); + } + protos::pbzero::DebugAnnotation::Decoder annotation(*it); + auto process_name = annotation.string_value(); + if (!process_name.size) + return util::OkStatus(); + auto process_name_id = + storage_->InternString(base::StringView(process_name)); + // Don't override system-provided names. + procs->SetProcessNameIfUnset(*upid_, process_name_id); + return util::OkStatus(); + } + // Other metadata events are proxied via the raw table for JSON export. + ParseLegacyEventAsRawEvent(); + return util::OkStatus(); + } + + util::Status ParseLegacyEventAsRawEvent() { + if (!utid_) + return util::ErrStatus("raw legacy event without thread association"); + + RawId id = storage_->mutable_raw_table() + ->Insert({ts_, parser_->raw_legacy_event_id_, 0, *utid_}) + .id; + + auto inserter = context_->args_tracker->AddArgsTo(id); + inserter + .AddArg(parser_->legacy_event_category_key_id_, + Variadic::String(category_id_)) + .AddArg(parser_->legacy_event_name_key_id_, Variadic::String(name_id_)); + + std::string phase_string(1, static_cast(legacy_event_.phase())); + StringId phase_id = storage_->InternString(phase_string.c_str()); + inserter.AddArg(parser_->legacy_event_phase_key_id_, + Variadic::String(phase_id)); + + if (legacy_event_.has_duration_us()) { + inserter.AddArg(parser_->legacy_event_duration_ns_key_id_, + Variadic::Integer(legacy_event_.duration_us() * 1000)); + } + + if (thread_timestamp_) { + inserter.AddArg(parser_->legacy_event_thread_timestamp_ns_key_id_, + Variadic::Integer(*thread_timestamp_)); + if (legacy_event_.has_thread_duration_us()) { + inserter.AddArg( + parser_->legacy_event_thread_duration_ns_key_id_, + Variadic::Integer(legacy_event_.thread_duration_us() * 1000)); + } + } + + if (thread_instruction_count_) { + inserter.AddArg(parser_->legacy_event_thread_instruction_count_key_id_, + Variadic::Integer(*thread_instruction_count_)); + if (legacy_event_.has_thread_instruction_delta()) { + inserter.AddArg( + parser_->legacy_event_thread_instruction_delta_key_id_, + Variadic::Integer(legacy_event_.thread_instruction_delta())); + } + } + + if (legacy_event_.use_async_tts()) { + inserter.AddArg(parser_->legacy_event_use_async_tts_key_id_, + Variadic::Boolean(true)); + } + + bool has_id = false; + if (legacy_event_.has_unscoped_id()) { + // Unscoped ids are either global or local depending on the phase. Pass + // them through as unscoped IDs to JSON export to preserve this behavior. + inserter.AddArg(parser_->legacy_event_unscoped_id_key_id_, + Variadic::UnsignedInteger(legacy_event_.unscoped_id())); + has_id = true; + } else if (legacy_event_.has_global_id()) { + inserter.AddArg(parser_->legacy_event_global_id_key_id_, + Variadic::UnsignedInteger(legacy_event_.global_id())); + has_id = true; + } else if (legacy_event_.has_local_id()) { + inserter.AddArg(parser_->legacy_event_local_id_key_id_, + Variadic::UnsignedInteger(legacy_event_.local_id())); + has_id = true; + } + + if (has_id && legacy_event_.has_id_scope() && + legacy_event_.id_scope().size) { + inserter.AddArg( + parser_->legacy_event_id_scope_key_id_, + Variadic::String(storage_->InternString(legacy_event_.id_scope()))); + } + + // No need to parse legacy_event.instant_event_scope() because we import + // instant events into the slice table. + + ParseTrackEventArgs(&inserter); + return util::OkStatus(); + } + + void ParseTrackEventArgs(BoundInserter* inserter) { + auto log_errors = [this](util::Status status) { + if (status.ok()) + return; + // Log error but continue parsing the other args. + storage_->IncrementStats(stats::track_event_parser_errors); + PERFETTO_DLOG("ParseTrackEventArgs error: %s", status.c_message()); + }; + + if (event_.has_source_location_iid()) { + log_errors(AddSourceLocationArgs(event_.source_location_iid(), inserter)); + } + + if (event_.has_task_execution()) { + log_errors(ParseTaskExecutionArgs(event_.task_execution(), inserter)); + } + if (event_.has_log_message()) { + log_errors(ParseLogMessage(event_.log_message(), inserter)); + } + if (event_.has_chrome_histogram_sample()) { + log_errors( + ParseHistogramName(event_.chrome_histogram_sample(), inserter)); + } + if (event_.has_chrome_active_processes()) { + protos::pbzero::ChromeActiveProcesses::Decoder message( + event_.chrome_active_processes()); + for (auto it = message.pid(); it; ++it) { + parser_->AddActiveProcess(ts_, *it); + } + } + + TrackEventArgsParser args_writer(ts_, *inserter, *storage_, + *sequence_state_); + int unknown_extensions = 0; + log_errors(parser_->args_parser_.ParseMessage( + blob_, ".perfetto.protos.TrackEvent", &parser_->reflect_fields_, + args_writer, &unknown_extensions)); + if (unknown_extensions > 0) { + context_->storage->IncrementStats(stats::unknown_extension_fields, + unknown_extensions); + } + + { + auto key = parser_->args_parser_.EnterDictionary("debug"); + util::DebugAnnotationParser parser(parser_->args_parser_); + for (auto it = event_.debug_annotations(); it; ++it) { + log_errors(parser.Parse(*it, args_writer)); + } + } + + if (legacy_passthrough_utid_) { + inserter->AddArg(parser_->legacy_event_passthrough_utid_id_, + Variadic::UnsignedInteger(*legacy_passthrough_utid_), + ArgsTracker::UpdatePolicy::kSkipIfExists); + } + } + + util::Status ParseTaskExecutionArgs(ConstBytes task_execution, + BoundInserter* inserter) { + protos::pbzero::TaskExecution::Decoder task(task_execution); + uint64_t iid = task.posted_from_iid(); + if (!iid) + return util::ErrStatus("TaskExecution with invalid posted_from_iid"); + + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kSourceLocationsFieldNumber, + protos::pbzero::SourceLocation>(iid); + if (!decoder) + return util::ErrStatus("TaskExecution with invalid posted_from_iid"); + + StringId file_name_id = kNullStringId; + StringId function_name_id = kNullStringId; + uint32_t line_number = 0; + + std::string file_name = NormalizePathSeparators(decoder->file_name()); + file_name_id = storage_->InternString(base::StringView(file_name)); + function_name_id = storage_->InternString(decoder->function_name()); + line_number = decoder->line_number(); + + inserter->AddArg(parser_->task_file_name_args_key_id_, + Variadic::String(file_name_id)); + inserter->AddArg(parser_->task_function_name_args_key_id_, + Variadic::String(function_name_id)); + inserter->AddArg(parser_->task_line_number_args_key_id_, + Variadic::UnsignedInteger(line_number)); + return util::OkStatus(); + } + + util::Status AddSourceLocationArgs(uint64_t iid, BoundInserter* inserter) { + if (!iid) + return util::ErrStatus("SourceLocation with invalid iid"); + + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kSourceLocationsFieldNumber, + protos::pbzero::SourceLocation>(iid); + if (!decoder) + return util::ErrStatus("SourceLocation with invalid iid"); + + StringId file_name_id = kNullStringId; + StringId function_name_id = kNullStringId; + uint32_t line_number = 0; + + std::string file_name = NormalizePathSeparators(decoder->file_name()); + file_name_id = storage_->InternString(base::StringView(file_name)); + function_name_id = storage_->InternString(decoder->function_name()); + line_number = decoder->line_number(); + + inserter->AddArg(parser_->source_location_file_name_key_id_, + Variadic::String(file_name_id)); + inserter->AddArg(parser_->source_location_function_name_key_id_, + Variadic::String(function_name_id)); + inserter->AddArg(parser_->source_location_line_number_key_id_, + Variadic::UnsignedInteger(line_number)); + return util::OkStatus(); + } + + util::Status ParseLogMessage(ConstBytes blob, BoundInserter* inserter) { + if (!utid_) + return util::ErrStatus("LogMessage without thread association"); + + protos::pbzero::LogMessage::Decoder message(blob); + + auto* body_decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kLogMessageBodyFieldNumber, + protos::pbzero::LogMessageBody>(message.body_iid()); + if (!body_decoder) + return util::ErrStatus("LogMessage with invalid body_iid"); + + const StringId log_message_id = + storage_->InternString(body_decoder->body()); + inserter->AddArg(parser_->log_message_body_key_id_, + Variadic::String(log_message_id)); + + StringId source_location_id = kNullStringId; + if (message.has_source_location_iid()) { + auto* source_location_decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kSourceLocationsFieldNumber, + protos::pbzero::SourceLocation>(message.source_location_iid()); + if (!source_location_decoder) + return util::ErrStatus("LogMessage with invalid source_location_iid"); + const std::string source_location = + source_location_decoder->file_name().ToStdString() + ":" + + std::to_string(source_location_decoder->line_number()); + source_location_id = + storage_->InternString(base::StringView(source_location)); + + inserter->AddArg(parser_->log_message_source_location_file_name_key_id_, + Variadic::String(storage_->InternString( + source_location_decoder->file_name()))); + inserter->AddArg( + parser_->log_message_source_location_function_name_key_id_, + Variadic::String(storage_->InternString( + source_location_decoder->function_name()))); + inserter->AddArg( + parser_->log_message_source_location_line_number_key_id_, + Variadic::Integer(source_location_decoder->line_number())); + } + + // The track event log message doesn't specify any priority. UI never + // displays priorities < 2 (VERBOSE in android). Let's make all the track + // event logs show up as INFO. + int32_t priority = protos::pbzero::AndroidLogPriority::PRIO_INFO; + if (message.has_prio()) { + priority = ToAndroidLogPriority( + static_cast(message.prio())); + inserter->AddArg(parser_->log_message_priority_id_, + Variadic::Integer(priority)); + } + + storage_->mutable_android_log_table()->Insert( + {ts_, *utid_, + /*priority*/ static_cast(priority), + /*tag_id*/ source_location_id, log_message_id}); + + return util::OkStatus(); + } + + util::Status ParseHistogramName(ConstBytes blob, BoundInserter* inserter) { + protos::pbzero::ChromeHistogramSample::Decoder sample(blob); + if (!sample.has_name_iid()) + return util::OkStatus(); + + if (sample.has_name()) { + return util::ErrStatus( + "name is already set for ChromeHistogramSample: only one of name and " + "name_iid can be set."); + } + + auto* decoder = sequence_state_->LookupInternedMessage< + protos::pbzero::InternedData::kHistogramNamesFieldNumber, + protos::pbzero::HistogramName>(sample.name_iid()); + if (!decoder) + return util::ErrStatus("HistogramName with invalid name_iid"); + + inserter->AddArg(parser_->histogram_name_key_id_, + Variadic::String(storage_->InternString(decoder->name()))); + return util::OkStatus(); + } + + tables::SliceTable::Row MakeThreadSliceRow() { + tables::SliceTable::Row row; + row.ts = ts_; + row.track_id = track_id_; + row.category = category_id_; + row.name = name_id_; + row.thread_ts = thread_timestamp_; + row.thread_dur = std::nullopt; + row.thread_instruction_count = thread_instruction_count_; + row.thread_instruction_delta = std::nullopt; + return row; + } + + TraceProcessorContext* context_; + TrackEventTracker* track_event_tracker_; + TraceStorage* storage_; + TrackEventParser* parser_; + ArgsTranslationTable* args_translation_table_; + int64_t ts_; + const TrackEventData* event_data_; + PacketSequenceStateGeneration* sequence_state_; + ConstBytes blob_; + TrackEvent::Decoder event_; + LegacyEvent::Decoder legacy_event_; + protos::pbzero::TrackEventDefaults::Decoder* defaults_; + + // Importing state. + StringId category_id_; + StringId name_id_; + uint64_t track_uuid_; + TrackId track_id_; + std::optional utid_; + std::optional upid_; + std::optional thread_timestamp_; + std::optional thread_instruction_count_; + // All events in legacy JSON require a thread ID, but for some types of + // events (e.g. async events or process/global-scoped instants), we don't + // store it in the slice/track model. To pass the utid through to the json + // export, we store it in an arg. + std::optional legacy_passthrough_utid_; + + uint32_t packet_sequence_id_; +}; + +TrackEventParser::TrackEventParser(TraceProcessorContext* context, + TrackEventTracker* track_event_tracker) + : args_parser_(*context->descriptor_pool_.get()), + context_(context), + track_event_tracker_(track_event_tracker), + counter_name_thread_time_id_( + context->storage->InternString("thread_time")), + counter_name_thread_instruction_count_id_( + context->storage->InternString("thread_instruction_count")), + task_file_name_args_key_id_( + context->storage->InternString("task.posted_from.file_name")), + task_function_name_args_key_id_( + context->storage->InternString("task.posted_from.function_name")), + task_line_number_args_key_id_( + context->storage->InternString("task.posted_from.line_number")), + log_message_body_key_id_( + context->storage->InternString("track_event.log_message")), + log_message_source_location_function_name_key_id_( + context->storage->InternString( + "track_event.log_message.function_name")), + log_message_source_location_file_name_key_id_( + context->storage->InternString("track_event.log_message.file_name")), + log_message_source_location_line_number_key_id_( + context->storage->InternString( + "track_event.log_message.line_number")), + log_message_priority_id_( + context->storage->InternString("track_event.priority")), + source_location_function_name_key_id_( + context->storage->InternString("source.function_name")), + source_location_file_name_key_id_( + context->storage->InternString("source.file_name")), + source_location_line_number_key_id_( + context->storage->InternString("source.line_number")), + raw_legacy_event_id_( + context->storage->InternString("track_event.legacy_event")), + legacy_event_passthrough_utid_id_( + context->storage->InternString("legacy_event.passthrough_utid")), + legacy_event_category_key_id_( + context->storage->InternString("legacy_event.category")), + legacy_event_name_key_id_( + context->storage->InternString("legacy_event.name")), + legacy_event_phase_key_id_( + context->storage->InternString("legacy_event.phase")), + legacy_event_duration_ns_key_id_( + context->storage->InternString("legacy_event.duration_ns")), + legacy_event_thread_timestamp_ns_key_id_( + context->storage->InternString("legacy_event.thread_timestamp_ns")), + legacy_event_thread_duration_ns_key_id_( + context->storage->InternString("legacy_event.thread_duration_ns")), + legacy_event_thread_instruction_count_key_id_( + context->storage->InternString( + "legacy_event.thread_instruction_count")), + legacy_event_thread_instruction_delta_key_id_( + context->storage->InternString( + "legacy_event.thread_instruction_delta")), + legacy_event_use_async_tts_key_id_( + context->storage->InternString("legacy_event.use_async_tts")), + legacy_event_unscoped_id_key_id_( + context->storage->InternString("legacy_event.unscoped_id")), + legacy_event_global_id_key_id_( + context->storage->InternString("legacy_event.global_id")), + legacy_event_local_id_key_id_( + context->storage->InternString("legacy_event.local_id")), + legacy_event_id_scope_key_id_( + context->storage->InternString("legacy_event.id_scope")), + legacy_event_bind_id_key_id_( + context->storage->InternString("legacy_event.bind_id")), + legacy_event_bind_to_enclosing_key_id_( + context->storage->InternString("legacy_event.bind_to_enclosing")), + legacy_event_flow_direction_key_id_( + context->storage->InternString("legacy_event.flow_direction")), + histogram_name_key_id_( + context->storage->InternString("chrome_histogram_sample.name")), + flow_direction_value_in_id_(context->storage->InternString("in")), + flow_direction_value_out_id_(context->storage->InternString("out")), + flow_direction_value_inout_id_(context->storage->InternString("inout")), + chrome_legacy_ipc_class_args_key_id_( + context->storage->InternString("legacy_ipc.class")), + chrome_legacy_ipc_line_args_key_id_( + context->storage->InternString("legacy_ipc.line")), + chrome_host_app_package_name_id_( + context->storage->InternString("chrome.host_app_package_name")), + chrome_crash_trace_id_name_id_( + context->storage->InternString("chrome.crash_trace_id")), + chrome_process_label_flat_key_id_( + context->storage->InternString("chrome.process_label")), + chrome_process_type_id_( + context_->storage->InternString("chrome.process_type")), + event_category_key_id_(context_->storage->InternString("event.category")), + event_name_key_id_(context_->storage->InternString("event.name")), + chrome_string_lookup_(context->storage.get()), + counter_unit_ids_{{kNullStringId, context_->storage->InternString("ns"), + context_->storage->InternString("count"), + context_->storage->InternString("bytes")}}, + active_chrome_processes_tracker_(context) { + args_parser_.AddParsingOverrideForField( + "chrome_mojo_event_info.mojo_interface_method_iid", + [](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + return MaybeParseUnsymbolizedSourceLocation( + "chrome_mojo_event_info.mojo_interface_method.native_symbol", field, + delegate); + }); + // Switch |source_location_iid| into its interned data variant. + args_parser_.AddParsingOverrideForField( + "begin_impl_frame_args.current_args.source_location_iid", + [](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + return MaybeParseSourceLocation("begin_impl_frame_args.current_args", + field, delegate); + }); + args_parser_.AddParsingOverrideForField( + "begin_impl_frame_args.last_args.source_location_iid", + [](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + return MaybeParseSourceLocation("begin_impl_frame_args.last_args", + field, delegate); + }); + args_parser_.AddParsingOverrideForField( + "begin_frame_observer_state.last_begin_frame_args.source_location_iid", + [](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + return MaybeParseSourceLocation( + "begin_frame_observer_state.last_begin_frame_args", field, + delegate); + }); + args_parser_.AddParsingOverrideForField( + "chrome_memory_pressure_notification.creation_location_iid", + [](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + return MaybeParseSourceLocation("chrome_memory_pressure_notification", + field, delegate); + }); + + // Parse DebugAnnotations. + args_parser_.AddParsingOverrideForType( + ".perfetto.protos.DebugAnnotation", + [&](util::ProtoToArgsParser::ScopedNestedKeyContext& key, + const protozero::ConstBytes& data, + util::ProtoToArgsParser::Delegate& delegate) { + // Do not add "debug_annotations" to the final key. + key.RemoveFieldSuffix(); + util::DebugAnnotationParser annotation_parser(args_parser_); + return annotation_parser.Parse(data, delegate); + }); + + args_parser_.AddParsingOverrideForField( + "active_processes.pid", [&](const protozero::Field& field, + util::ProtoToArgsParser::Delegate& delegate) { + AddActiveProcess(delegate.packet_timestamp(), field.as_int32()); + // Fallthrough so that the parser adds pid as a regular arg. + return std::nullopt; + }); + + for (uint16_t index : kReflectFields) { + reflect_fields_.push_back(index); + } +} + +void TrackEventParser::ParseTrackDescriptor( + int64_t packet_timestamp, + protozero::ConstBytes track_descriptor, + uint32_t packet_sequence_id) { + protos::pbzero::TrackDescriptor::Decoder decoder(track_descriptor); + + // Ensure that the track and its parents are resolved. This may start a new + // process and/or thread (i.e. new upid/utid). + TrackId track_id = *track_event_tracker_->GetDescriptorTrack( + decoder.uuid(), kNullStringId, packet_sequence_id); + + if (decoder.has_thread()) { + UniqueTid utid = ParseThreadDescriptor(decoder.thread()); + if (decoder.has_chrome_thread()) + ParseChromeThreadDescriptor(utid, decoder.chrome_thread()); + } else if (decoder.has_process()) { + UniquePid upid = + ParseProcessDescriptor(packet_timestamp, decoder.process()); + if (decoder.has_chrome_process()) + ParseChromeProcessDescriptor(upid, decoder.chrome_process()); + } else if (decoder.has_counter()) { + ParseCounterDescriptor(track_id, decoder.counter()); + } + + // Override the name with the most recent name seen (after sorting by ts). + if (decoder.has_name()) { + auto* tracks = context_->storage->mutable_track_table(); + StringId name_id = context_->storage->InternString(decoder.name()); + tracks->mutable_name()->Set(*tracks->id().IndexOf(track_id), name_id); + } +} + +UniquePid TrackEventParser::ParseProcessDescriptor( + int64_t packet_timestamp, + protozero::ConstBytes process_descriptor) { + protos::pbzero::ProcessDescriptor::Decoder decoder(process_descriptor); + UniquePid upid = context_->process_tracker->GetOrCreateProcess( + static_cast(decoder.pid())); + active_chrome_processes_tracker_.AddProcessDescriptor(packet_timestamp, upid); + if (decoder.has_process_name() && decoder.process_name().size) { + // Don't override system-provided names. + context_->process_tracker->SetProcessNameIfUnset( + upid, context_->storage->InternString(decoder.process_name())); + } + if (decoder.has_start_timestamp_ns() && decoder.start_timestamp_ns() > 0) { + context_->process_tracker->SetStartTsIfUnset(upid, + decoder.start_timestamp_ns()); + } + // TODO(skyostil): Remove parsing for legacy chrome_process_type field. + if (decoder.has_chrome_process_type()) { + StringId name_id = + chrome_string_lookup_.GetProcessName(decoder.chrome_process_type()); + // Don't override system-provided names. + context_->process_tracker->SetProcessNameIfUnset(upid, name_id); + } + int label_index = 0; + for (auto it = decoder.process_labels(); it; it++) { + StringId label_id = context_->storage->InternString(*it); + std::string key = "chrome.process_label["; + key.append(std::to_string(label_index++)); + key.append("]"); + context_->process_tracker->AddArgsTo(upid).AddArg( + chrome_process_label_flat_key_id_, + context_->storage->InternString(base::StringView(key)), + Variadic::String(label_id)); + } + return upid; +} + +void TrackEventParser::ParseChromeProcessDescriptor( + UniquePid upid, + protozero::ConstBytes chrome_process_descriptor) { + protos::pbzero::ChromeProcessDescriptor::Decoder decoder( + chrome_process_descriptor); + + StringId name_id = + chrome_string_lookup_.GetProcessName(decoder.process_type()); + // Don't override system-provided names. + context_->process_tracker->SetProcessNameIfUnset(upid, name_id); + + ArgsTracker::BoundInserter process_args = + context_->process_tracker->AddArgsTo(upid); + // For identifying Chrome processes in system traces. + process_args.AddArg(chrome_process_type_id_, Variadic::String(name_id)); + if (decoder.has_host_app_package_name()) { + process_args.AddArg(chrome_host_app_package_name_id_, + Variadic::String(context_->storage->InternString( + decoder.host_app_package_name()))); + } + if (decoder.has_crash_trace_id()) { + process_args.AddArg(chrome_crash_trace_id_name_id_, + Variadic::UnsignedInteger(decoder.crash_trace_id())); + } +} + +UniqueTid TrackEventParser::ParseThreadDescriptor( + protozero::ConstBytes thread_descriptor) { + protos::pbzero::ThreadDescriptor::Decoder decoder(thread_descriptor); + UniqueTid utid = context_->process_tracker->UpdateThread( + static_cast(decoder.tid()), + static_cast(decoder.pid())); + StringId name_id = kNullStringId; + if (decoder.has_thread_name() && decoder.thread_name().size) { + name_id = context_->storage->InternString(decoder.thread_name()); + } else if (decoder.has_chrome_thread_type()) { + // TODO(skyostil): Remove parsing for legacy chrome_thread_type field. + name_id = chrome_string_lookup_.GetThreadName(decoder.chrome_thread_type()); + } + context_->process_tracker->UpdateThreadNameByUtid( + utid, name_id, ThreadNamePriority::kTrackDescriptor); + return utid; +} + +void TrackEventParser::ParseChromeThreadDescriptor( + UniqueTid utid, + protozero::ConstBytes chrome_thread_descriptor) { + protos::pbzero::ChromeThreadDescriptor::Decoder decoder( + chrome_thread_descriptor); + if (!decoder.has_thread_type()) + return; + + StringId name_id = chrome_string_lookup_.GetThreadName(decoder.thread_type()); + context_->process_tracker->UpdateThreadNameByUtid( + utid, name_id, ThreadNamePriority::kTrackDescriptorThreadType); +} + +void TrackEventParser::ParseCounterDescriptor( + TrackId track_id, + protozero::ConstBytes counter_descriptor) { + using protos::pbzero::CounterDescriptor; + + CounterDescriptor::Decoder decoder(counter_descriptor); + auto* counter_tracks = context_->storage->mutable_counter_track_table(); + + size_t unit_index = static_cast(decoder.unit()); + if (unit_index >= counter_unit_ids_.size()) + unit_index = CounterDescriptor::UNIT_UNSPECIFIED; + + auto opt_track_idx = counter_tracks->id().IndexOf(track_id); + if (!opt_track_idx) { + context_->storage->IncrementStats(stats::track_event_parser_errors); + return; + } + + auto track_idx = *opt_track_idx; + + switch (decoder.type()) { + case CounterDescriptor::COUNTER_UNSPECIFIED: + break; + case CounterDescriptor::COUNTER_THREAD_TIME_NS: + unit_index = CounterDescriptor::UNIT_TIME_NS; + counter_tracks->mutable_name()->Set(track_idx, + counter_name_thread_time_id_); + break; + case CounterDescriptor::COUNTER_THREAD_INSTRUCTION_COUNT: + unit_index = CounterDescriptor::UNIT_COUNT; + counter_tracks->mutable_name()->Set( + track_idx, counter_name_thread_instruction_count_id_); + break; + } + + counter_tracks->mutable_unit()->Set(track_idx, counter_unit_ids_[unit_index]); +} + +void TrackEventParser::ParseTrackEvent(int64_t ts, + const TrackEventData* event_data, + ConstBytes blob, + uint32_t packet_sequence_id) { + const auto range_of_interest_start_us = + track_event_tracker_->range_of_interest_start_us(); + if (context_->config.drop_track_event_data_before == + DropTrackEventDataBefore::kTrackEventRangeOfInterest && + range_of_interest_start_us && ts < *range_of_interest_start_us * 1000) { + // The event is outside of the range of interest, and dropping is enabled. + // So we drop the event. + context_->storage->IncrementStats( + stats::track_event_dropped_packets_outside_of_range_of_interest); + return; + } + util::Status status = + EventImporter(this, ts, event_data, std::move(blob), packet_sequence_id) + .Import(); + if (!status.ok()) { + context_->storage->IncrementStats(stats::track_event_parser_errors); + PERFETTO_DLOG("ParseTrackEvent error: %s", status.c_message()); + } +} + +void TrackEventParser::AddActiveProcess(int64_t packet_timestamp, int32_t pid) { + UniquePid upid = + context_->process_tracker->GetOrCreateProcess(static_cast(pid)); + active_chrome_processes_tracker_.AddActiveProcessMetadata(packet_timestamp, + upid); +} + +void TrackEventParser::NotifyEndOfFile() { + active_chrome_processes_tracker_.NotifyEndOfFile(); +} + +} // namespace trace_processor +} // namespace perfetto + +#endif diff --git a/kram-profile/kram-profile.xcodeproj/project.pbxproj b/kram-profile/kram-profile.xcodeproj/project.pbxproj new file mode 100644 index 00000000..fb29e9a0 --- /dev/null +++ b/kram-profile/kram-profile.xcodeproj/project.pbxproj @@ -0,0 +1,500 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 56; + objects = { + +/* Begin PBXBuildFile section */ + 705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68CC2B820AD100437FAA /* kram_profileApp.swift */; }; + 705F68D12B820AD200437FAA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D02B820AD200437FAA /* Assets.xcassets */; }; + 705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 705F68D32B820AD200437FAA /* Preview Assets.xcassets */; }; + 705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */; }; + 705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68DF2B87EB8000437FAA /* AnyCodable.swift */; }; + 705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E02B87EB8000437FAA /* AnyEncodable.swift */; }; + 705F68E52B89907700437FAA /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 705F68E42B89907700437FAA /* README.md */; }; + 705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E62B8BEB7000437FAA /* DataCompression.swift */; }; + 705F68E92B9451CC00437FAA /* Log.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68E82B9451CC00437FAA /* Log.swift */; }; + 705F68EB2B94E33800437FAA /* Keycode.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F68EA2B94E33800437FAA /* Keycode.swift */; }; + 705F69052BA2ED1300437FAA /* KramZipHelper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */; }; + 705F69062BA2ED1300437FAA /* miniz.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F69002BA2ED1200437FAA /* miniz.cpp */; }; + 705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 705F69042BA2ED1200437FAA /* track_event_parser.cpp */; }; + 705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */ = {isa = PBXBuildFile; fileRef = 705F69092BA3801D00437FAA /* KramZipHelperW.mm */; }; + 705F690C2BA3CDEC00437FAA /* File.swift in Sources */ = {isa = PBXBuildFile; fileRef = 705F690B2BA3CDEC00437FAA /* File.swift */; }; + 7061C76F2BAFB715003EC937 /* Utils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7672BAFB715003EC937 /* Utils.cpp */; }; + 7061C7702BAFB715003EC937 /* Analysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7682BAFB715003EC937 /* Analysis.cpp */; }; + 7061C7712BAFB715003EC937 /* Arena.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7692BAFB715003EC937 /* Arena.cpp */; }; + 7061C7722BAFB715003EC937 /* CBA.mm in Sources */ = {isa = PBXBuildFile; fileRef = 7061C76B2BAFB715003EC937 /* CBA.mm */; }; + 7061C7732BAFB715003EC937 /* BuildEvents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C76C2BAFB715003EC937 /* BuildEvents.cpp */; }; + 7061C7762BAFC323003EC937 /* simdjson.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7061C7742BAFC323003EC937 /* simdjson.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 705F68C92B820AD100437FAA /* kram-profile.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "kram-profile.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 705F68CC2B820AD100437FAA /* kram_profileApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = kram_profileApp.swift; sourceTree = ""; }; + 705F68D02B820AD200437FAA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 705F68D32B820AD200437FAA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 705F68D52B820AD200437FAA /* kram_profile.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = kram_profile.entitlements; sourceTree = ""; }; + 705F68DD2B86AB2000437FAA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist; path = Info.plist; sourceTree = ""; }; + 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyDecodable.swift; sourceTree = ""; }; + 705F68DF2B87EB8000437FAA /* AnyCodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyCodable.swift; sourceTree = ""; }; + 705F68E02B87EB8000437FAA /* AnyEncodable.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnyEncodable.swift; sourceTree = ""; }; + 705F68E42B89907700437FAA /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = SOURCE_ROOT; }; + 705F68E62B8BEB7000437FAA /* DataCompression.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataCompression.swift; sourceTree = ""; }; + 705F68E82B9451CC00437FAA /* Log.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Log.swift; sourceTree = ""; }; + 705F68EA2B94E33800437FAA /* Keycode.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Keycode.swift; sourceTree = ""; }; + 705F68FE2BA2ED1200437FAA /* KramZipHelper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipHelper.h; sourceTree = ""; }; + 705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = KramZipHelper.cpp; sourceTree = ""; }; + 705F69002BA2ED1200437FAA /* miniz.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = miniz.cpp; sourceTree = ""; }; + 705F69022BA2ED1200437FAA /* miniz.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = miniz.h; sourceTree = ""; }; + 705F69032BA2ED1200437FAA /* kram-profile-Bridging-Header.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "kram-profile-Bridging-Header.h"; sourceTree = ""; }; + 705F69042BA2ED1200437FAA /* track_event_parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = track_event_parser.cpp; sourceTree = ""; }; + 705F69082BA3801D00437FAA /* KramZipHelperW.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = KramZipHelperW.h; sourceTree = ""; }; + 705F69092BA3801D00437FAA /* KramZipHelperW.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = KramZipHelperW.mm; sourceTree = ""; }; + 705F690B2BA3CDEC00437FAA /* File.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = File.swift; sourceTree = ""; }; + 7061C7662BAFB715003EC937 /* Utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Utils.h; sourceTree = ""; }; + 7061C7672BAFB715003EC937 /* Utils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Utils.cpp; sourceTree = ""; }; + 7061C7682BAFB715003EC937 /* Analysis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Analysis.cpp; sourceTree = ""; }; + 7061C7692BAFB715003EC937 /* Arena.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Arena.cpp; sourceTree = ""; }; + 7061C76A2BAFB715003EC937 /* BuildEvents.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BuildEvents.h; sourceTree = ""; }; + 7061C76B2BAFB715003EC937 /* CBA.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CBA.mm; sourceTree = ""; }; + 7061C76C2BAFB715003EC937 /* BuildEvents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BuildEvents.cpp; sourceTree = ""; }; + 7061C76D2BAFB715003EC937 /* Arena.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Arena.h; sourceTree = ""; }; + 7061C76E2BAFB715003EC937 /* Analysis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Analysis.h; sourceTree = ""; }; + 7061C7742BAFC323003EC937 /* simdjson.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = simdjson.cpp; sourceTree = ""; }; + 7061C7752BAFC323003EC937 /* simdjson.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = simdjson.h; sourceTree = ""; }; + 7061C7772BAFD82E003EC937 /* CBA.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CBA.h; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 705F68C62B820AD100437FAA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 705F68C02B820AD100437FAA = { + isa = PBXGroup; + children = ( + 7061C7652BAFB715003EC937 /* CBA */, + 705F68FD2BA2ED1200437FAA /* Source */, + 705F68CB2B820AD100437FAA /* kram-profile */, + 705F68CA2B820AD100437FAA /* Products */, + ); + sourceTree = ""; + }; + 705F68CA2B820AD100437FAA /* Products */ = { + isa = PBXGroup; + children = ( + 705F68C92B820AD100437FAA /* kram-profile.app */, + ); + name = Products; + sourceTree = ""; + }; + 705F68CB2B820AD100437FAA /* kram-profile */ = { + isa = PBXGroup; + children = ( + 705F68DD2B86AB2000437FAA /* Info.plist */, + 705F68CC2B820AD100437FAA /* kram_profileApp.swift */, + 705F690B2BA3CDEC00437FAA /* File.swift */, + 705F68EA2B94E33800437FAA /* Keycode.swift */, + 705F68D02B820AD200437FAA /* Assets.xcassets */, + 705F68DF2B87EB8000437FAA /* AnyCodable.swift */, + 705F68DE2B87EB8000437FAA /* AnyDecodable.swift */, + 705F68E02B87EB8000437FAA /* AnyEncodable.swift */, + 705F68E62B8BEB7000437FAA /* DataCompression.swift */, + 705F68E82B9451CC00437FAA /* Log.swift */, + 705F68D52B820AD200437FAA /* kram_profile.entitlements */, + 705F68E42B89907700437FAA /* README.md */, + 705F68D22B820AD200437FAA /* Preview Content */, + ); + path = "kram-profile"; + sourceTree = ""; + }; + 705F68D22B820AD200437FAA /* Preview Content */ = { + isa = PBXGroup; + children = ( + 705F68D32B820AD200437FAA /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 705F68FD2BA2ED1200437FAA /* Source */ = { + isa = PBXGroup; + children = ( + 705F69032BA2ED1200437FAA /* kram-profile-Bridging-Header.h */, + 705F68FE2BA2ED1200437FAA /* KramZipHelper.h */, + 705F68FF2BA2ED1200437FAA /* KramZipHelper.cpp */, + 705F69082BA3801D00437FAA /* KramZipHelperW.h */, + 705F69092BA3801D00437FAA /* KramZipHelperW.mm */, + 705F69022BA2ED1200437FAA /* miniz.h */, + 705F69002BA2ED1200437FAA /* miniz.cpp */, + 705F69042BA2ED1200437FAA /* track_event_parser.cpp */, + ); + path = Source; + sourceTree = ""; + }; + 7061C7652BAFB715003EC937 /* CBA */ = { + isa = PBXGroup; + children = ( + 7061C7662BAFB715003EC937 /* Utils.h */, + 7061C7672BAFB715003EC937 /* Utils.cpp */, + 7061C76D2BAFB715003EC937 /* Arena.h */, + 7061C7692BAFB715003EC937 /* Arena.cpp */, + 7061C76E2BAFB715003EC937 /* Analysis.h */, + 7061C7682BAFB715003EC937 /* Analysis.cpp */, + 7061C76A2BAFB715003EC937 /* BuildEvents.h */, + 7061C76C2BAFB715003EC937 /* BuildEvents.cpp */, + 7061C7772BAFD82E003EC937 /* CBA.h */, + 7061C76B2BAFB715003EC937 /* CBA.mm */, + 7061C7742BAFC323003EC937 /* simdjson.cpp */, + 7061C7752BAFC323003EC937 /* simdjson.h */, + ); + path = CBA; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 705F68C82B820AD100437FAA /* kram-profile */ = { + isa = PBXNativeTarget; + buildConfigurationList = 705F68D82B820AD200437FAA /* Build configuration list for PBXNativeTarget "kram-profile" */; + buildPhases = ( + 705F68C52B820AD100437FAA /* Sources */, + 705F68C62B820AD100437FAA /* Frameworks */, + 705F68C72B820AD100437FAA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "kram-profile"; + productName = "kram-profile"; + productReference = 705F68C92B820AD100437FAA /* kram-profile.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 705F68C12B820AD100437FAA /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastSwiftUpdateCheck = 1520; + LastUpgradeCheck = 1520; + TargetAttributes = { + 705F68C82B820AD100437FAA = { + CreatedOnToolsVersion = 15.2; + LastSwiftMigration = 1520; + }; + }; + }; + buildConfigurationList = 705F68C42B820AD100437FAA /* Build configuration list for PBXProject "kram-profile" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 705F68C02B820AD100437FAA; + productRefGroup = 705F68CA2B820AD100437FAA /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 705F68C82B820AD100437FAA /* kram-profile */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 705F68C72B820AD100437FAA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 705F68E52B89907700437FAA /* README.md in Resources */, + 705F68D42B820AD200437FAA /* Preview Assets.xcassets in Resources */, + 705F68D12B820AD200437FAA /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 705F68C52B820AD100437FAA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 705F68E12B87EB8000437FAA /* AnyDecodable.swift in Sources */, + 705F690C2BA3CDEC00437FAA /* File.swift in Sources */, + 7061C7712BAFB715003EC937 /* Arena.cpp in Sources */, + 705F68E32B87EB8000437FAA /* AnyEncodable.swift in Sources */, + 705F68E92B9451CC00437FAA /* Log.swift in Sources */, + 705F68EB2B94E33800437FAA /* Keycode.swift in Sources */, + 705F69062BA2ED1300437FAA /* miniz.cpp in Sources */, + 705F69052BA2ED1300437FAA /* KramZipHelper.cpp in Sources */, + 705F68E22B87EB8000437FAA /* AnyCodable.swift in Sources */, + 7061C7732BAFB715003EC937 /* BuildEvents.cpp in Sources */, + 7061C7762BAFC323003EC937 /* simdjson.cpp in Sources */, + 705F69072BA2ED1300437FAA /* track_event_parser.cpp in Sources */, + 7061C76F2BAFB715003EC937 /* Utils.cpp in Sources */, + 705F68CD2B820AD100437FAA /* kram_profileApp.swift in Sources */, + 705F68E72B8BEB7100437FAA /* DataCompression.swift in Sources */, + 705F690A2BA3801D00437FAA /* KramZipHelperW.mm in Sources */, + 7061C7722BAFB715003EC937 /* CBA.mm in Sources */, + 7061C7702BAFB715003EC937 /* Analysis.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 705F68D62B820AD200437FAA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_GCD_PERFORMANCE = YES; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_ANALYZER_OSOBJECT_C_STYLE_CAST = YES; + CLANG_ANALYZER_SECURITY_FLOATLOOPCOUNTER = YES; + CLANG_ANALYZER_SECURITY_INSECUREAPI_RAND = YES; + CLANG_ANALYZER_SECURITY_INSECUREAPI_STRCPY = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_CFLAGS = "-ftime-trace"; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; + SWIFT_OBJC_INTEROP_MODE = objc; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 6.0; + }; + name = Debug; + }; + 705F68D72B820AD200437FAA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ARCHS = arm64; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_GCD_PERFORMANCE = YES; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_ANALYZER_OSOBJECT_C_STYLE_CAST = YES; + CLANG_ANALYZER_SECURITY_FLOATLOOPCOUNTER = YES; + CLANG_ANALYZER_SECURITY_INSECUREAPI_RAND = YES; + CLANG_ANALYZER_SECURITY_INSECUREAPI_STRCPY = YES; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEAD_CODE_STRIPPING = YES; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 13.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + OTHER_CFLAGS = "-ftime-trace"; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = ""; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OBJC_INTEROP_MODE = objc; + SWIFT_VERSION = 6.0; + }; + name = Release; + }; + 705F68D92B820AD200437FAA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CLANG_CXX_STANDARD_LIBRARY_HARDENING = none; + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements"; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application"; + CODE_SIGN_STYLE = Manual; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\""; + DEVELOPMENT_TEAM = ""; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; + ENABLE_APP_SANDBOX = YES; + ENABLE_HARDENED_RUNTIME = NO; + ENABLE_PREVIEWS = YES; + ENABLE_USER_SELECTED_FILES = readonly; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_FILE = "kram-profile/Info.plist"; + INFOPLIST_KEY_CFBundleDisplayName = "Kram Profiler"; + INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramProfilerProvision; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h"; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 705F68DA2B820AD200437FAA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CLANG_CXX_STANDARD_LIBRARY_HARDENING = fast; + CLANG_ENABLE_MODULES = YES; + CODE_SIGN_ENTITLEMENTS = "kram-profile/kram_profile.entitlements"; + CODE_SIGN_IDENTITY = "Developer ID Application: Alec Miller (LDJ95E4NS8)"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Developer ID Application"; + CODE_SIGN_STYLE = Manual; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"kram-profile/Preview Content\""; + DEVELOPMENT_TEAM = ""; + "DEVELOPMENT_TEAM[sdk=macosx*]" = LDJ95E4NS8; + ENABLE_APP_SANDBOX = YES; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + ENABLE_USER_SELECTED_FILES = readonly; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_FILE = "kram-profile/Info.plist"; + INFOPLIST_KEY_CFBundleDisplayName = "Kram Profiler"; + INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.developer-tools"; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "com.hialec.kram-profile"; + PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ""; + "PROVISIONING_PROFILE_SPECIFIER[sdk=macosx*]" = KramProfilerProvision; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_OBJC_BRIDGING_HEADER = "Source/kram-profile-Bridging-Header.h"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 705F68C42B820AD100437FAA /* Build configuration list for PBXProject "kram-profile" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 705F68D62B820AD200437FAA /* Debug */, + 705F68D72B820AD200437FAA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 705F68D82B820AD200437FAA /* Build configuration list for PBXNativeTarget "kram-profile" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 705F68D92B820AD200437FAA /* Debug */, + 705F68DA2B820AD200437FAA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 705F68C12B820AD100437FAA /* Project object */; +} diff --git a/kram-profile/kram-profile/AnyCodable.swift b/kram-profile/kram-profile/AnyCodable.swift new file mode 100644 index 00000000..5f9924d9 --- /dev/null +++ b/kram-profile/kram-profile/AnyCodable.swift @@ -0,0 +1,147 @@ +import Foundation +/** + A type-erased `Codable` value. + + The `AnyCodable` type forwards encoding and decoding responsibilities + to an underlying value, hiding its specific underlying type. + + You can encode or decode mixed-type values in dictionaries + and other collections that require `Encodable` or `Decodable` conformance + by declaring their contained type to be `AnyCodable`. + + - SeeAlso: `AnyEncodable` + - SeeAlso: `AnyDecodable` + */ +@frozen public struct AnyCodable: Codable { + public let value: Any + + public init(_ value: T?) { + self.value = value ?? () + } +} + +extension AnyCodable: _AnyEncodable, _AnyDecodable {} + +extension AnyCodable: Equatable { + public static func == (lhs: AnyCodable, rhs: AnyCodable) -> Bool { + switch (lhs.value, rhs.value) { + case is (Void, Void): + return true + case let (lhs as Bool, rhs as Bool): + return lhs == rhs + case let (lhs as Int, rhs as Int): + return lhs == rhs + case let (lhs as Int8, rhs as Int8): + return lhs == rhs + case let (lhs as Int16, rhs as Int16): + return lhs == rhs + case let (lhs as Int32, rhs as Int32): + return lhs == rhs + case let (lhs as Int64, rhs as Int64): + return lhs == rhs + case let (lhs as UInt, rhs as UInt): + return lhs == rhs + case let (lhs as UInt8, rhs as UInt8): + return lhs == rhs + case let (lhs as UInt16, rhs as UInt16): + return lhs == rhs + case let (lhs as UInt32, rhs as UInt32): + return lhs == rhs + case let (lhs as UInt64, rhs as UInt64): + return lhs == rhs + case let (lhs as Float, rhs as Float): + return lhs == rhs + case let (lhs as Double, rhs as Double): + return lhs == rhs + case let (lhs as String, rhs as String): + return lhs == rhs + case let (lhs as [String: AnyCodable], rhs as [String: AnyCodable]): + return lhs == rhs + case let (lhs as [AnyCodable], rhs as [AnyCodable]): + return lhs == rhs + case let (lhs as [String: Any], rhs as [String: Any]): + return NSDictionary(dictionary: lhs) == NSDictionary(dictionary: rhs) + case let (lhs as [Any], rhs as [Any]): + return NSArray(array: lhs) == NSArray(array: rhs) + case is (NSNull, NSNull): + return true + default: + return false + } + } +} + +extension AnyCodable: CustomStringConvertible { + public var description: String { + switch value { + case is Void: + return String(describing: nil as Any?) + case let value as CustomStringConvertible: + return value.description + default: + return String(describing: value) + } + } +} + +extension AnyCodable: CustomDebugStringConvertible { + public var debugDescription: String { + switch value { + case let value as CustomDebugStringConvertible: + return "AnyCodable(\(value.debugDescription))" + default: + return "AnyCodable(\(description))" + } + } +} + +extension AnyCodable: ExpressibleByNilLiteral {} +extension AnyCodable: ExpressibleByBooleanLiteral {} +extension AnyCodable: ExpressibleByIntegerLiteral {} +extension AnyCodable: ExpressibleByFloatLiteral {} +extension AnyCodable: ExpressibleByStringLiteral {} +extension AnyCodable: ExpressibleByStringInterpolation {} +extension AnyCodable: ExpressibleByArrayLiteral {} +extension AnyCodable: ExpressibleByDictionaryLiteral {} + + +extension AnyCodable: Hashable { + public func hash(into hasher: inout Hasher) { + switch value { + case let value as Bool: + hasher.combine(value) + case let value as Int: + hasher.combine(value) + case let value as Int8: + hasher.combine(value) + case let value as Int16: + hasher.combine(value) + case let value as Int32: + hasher.combine(value) + case let value as Int64: + hasher.combine(value) + case let value as UInt: + hasher.combine(value) + case let value as UInt8: + hasher.combine(value) + case let value as UInt16: + hasher.combine(value) + case let value as UInt32: + hasher.combine(value) + case let value as UInt64: + hasher.combine(value) + case let value as Float: + hasher.combine(value) + case let value as Double: + hasher.combine(value) + case let value as String: + hasher.combine(value) + case let value as [String: AnyCodable]: + hasher.combine(value) + case let value as [AnyCodable]: + hasher.combine(value) + default: + break + } + } +} diff --git a/kram-profile/kram-profile/AnyDecodable.swift b/kram-profile/kram-profile/AnyDecodable.swift new file mode 100644 index 00000000..9b422280 --- /dev/null +++ b/kram-profile/kram-profile/AnyDecodable.swift @@ -0,0 +1,188 @@ +#if canImport(Foundation) +import Foundation +#endif + +/** + A type-erased `Decodable` value. + + The `AnyDecodable` type forwards decoding responsibilities + to an underlying value, hiding its specific underlying type. + + You can decode mixed-type values in dictionaries + and other collections that require `Decodable` conformance + by declaring their contained type to be `AnyDecodable`: + + let json = """ + { + "boolean": true, + "integer": 42, + "double": 3.141592653589793, + "string": "string", + "array": [1, 2, 3], + "nested": { + "a": "alpha", + "b": "bravo", + "c": "charlie" + }, + "null": null + } + """.data(using: .utf8)! + + let decoder = JSONDecoder() + let dictionary = try! decoder.decode([String: AnyDecodable].self, from: json) + */ +@frozen public struct AnyDecodable: Decodable { + public let value: Any + + public init(_ value: T?) { + self.value = value ?? () + } +} + +@usableFromInline +protocol _AnyDecodable { + var value: Any { get } + init(_ value: T?) +} + +extension AnyDecodable: _AnyDecodable {} + +extension _AnyDecodable { + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + + if container.decodeNil() { + #if canImport(Foundation) + self.init(NSNull()) + #else + self.init(Optional.none) + #endif + } else if let bool = try? container.decode(Bool.self) { + self.init(bool) + } else if let int = try? container.decode(Int.self) { + self.init(int) + } else if let uint = try? container.decode(UInt.self) { + self.init(uint) + } else if let double = try? container.decode(Double.self) { + self.init(double) + } else if let string = try? container.decode(String.self) { + self.init(string) + } else if let array = try? container.decode([AnyDecodable].self) { + self.init(array.map { $0.value }) + } else if let dictionary = try? container.decode([String: AnyDecodable].self) { + self.init(dictionary.mapValues { $0.value }) + } else { + throw DecodingError.dataCorruptedError(in: container, debugDescription: "AnyDecodable value cannot be decoded") + } + } +} + +extension AnyDecodable: Equatable { + public static func == (lhs: AnyDecodable, rhs: AnyDecodable) -> Bool { + switch (lhs.value, rhs.value) { +#if canImport(Foundation) + case is (NSNull, NSNull), is (Void, Void): + return true +#endif + case let (lhs as Bool, rhs as Bool): + return lhs == rhs + case let (lhs as Int, rhs as Int): + return lhs == rhs + case let (lhs as Int8, rhs as Int8): + return lhs == rhs + case let (lhs as Int16, rhs as Int16): + return lhs == rhs + case let (lhs as Int32, rhs as Int32): + return lhs == rhs + case let (lhs as Int64, rhs as Int64): + return lhs == rhs + case let (lhs as UInt, rhs as UInt): + return lhs == rhs + case let (lhs as UInt8, rhs as UInt8): + return lhs == rhs + case let (lhs as UInt16, rhs as UInt16): + return lhs == rhs + case let (lhs as UInt32, rhs as UInt32): + return lhs == rhs + case let (lhs as UInt64, rhs as UInt64): + return lhs == rhs + case let (lhs as Float, rhs as Float): + return lhs == rhs + case let (lhs as Double, rhs as Double): + return lhs == rhs + case let (lhs as String, rhs as String): + return lhs == rhs + case let (lhs as [String: AnyDecodable], rhs as [String: AnyDecodable]): + return lhs == rhs + case let (lhs as [AnyDecodable], rhs as [AnyDecodable]): + return lhs == rhs + default: + return false + } + } +} + +extension AnyDecodable: CustomStringConvertible { + public var description: String { + switch value { + case is Void: + return String(describing: nil as Any?) + case let value as CustomStringConvertible: + return value.description + default: + return String(describing: value) + } + } +} + +extension AnyDecodable: CustomDebugStringConvertible { + public var debugDescription: String { + switch value { + case let value as CustomDebugStringConvertible: + return "AnyDecodable(\(value.debugDescription))" + default: + return "AnyDecodable(\(description))" + } + } +} + +extension AnyDecodable: Hashable { + public func hash(into hasher: inout Hasher) { + switch value { + case let value as Bool: + hasher.combine(value) + case let value as Int: + hasher.combine(value) + case let value as Int8: + hasher.combine(value) + case let value as Int16: + hasher.combine(value) + case let value as Int32: + hasher.combine(value) + case let value as Int64: + hasher.combine(value) + case let value as UInt: + hasher.combine(value) + case let value as UInt8: + hasher.combine(value) + case let value as UInt16: + hasher.combine(value) + case let value as UInt32: + hasher.combine(value) + case let value as UInt64: + hasher.combine(value) + case let value as Float: + hasher.combine(value) + case let value as Double: + hasher.combine(value) + case let value as String: + hasher.combine(value) + case let value as [String: AnyDecodable]: + hasher.combine(value) + case let value as [AnyDecodable]: + hasher.combine(value) + default: + break + } + } +} diff --git a/kram-profile/kram-profile/AnyEncodable.swift b/kram-profile/kram-profile/AnyEncodable.swift new file mode 100644 index 00000000..d5530e57 --- /dev/null +++ b/kram-profile/kram-profile/AnyEncodable.swift @@ -0,0 +1,291 @@ +#if canImport(Foundation) +import Foundation +#endif + +/** + A type-erased `Encodable` value. + + The `AnyEncodable` type forwards encoding responsibilities + to an underlying value, hiding its specific underlying type. + + You can encode mixed-type values in dictionaries + and other collections that require `Encodable` conformance + by declaring their contained type to be `AnyEncodable`: + + let dictionary: [String: AnyEncodable] = [ + "boolean": true, + "integer": 42, + "double": 3.141592653589793, + "string": "string", + "array": [1, 2, 3], + "nested": [ + "a": "alpha", + "b": "bravo", + "c": "charlie" + ], + "null": nil + ] + + let encoder = JSONEncoder() + let json = try! encoder.encode(dictionary) + */ +@frozen public struct AnyEncodable: Encodable { + public let value: Any + + public init(_ value: T?) { + self.value = value ?? () + } +} + +@usableFromInline +protocol _AnyEncodable { + var value: Any { get } + init(_ value: T?) +} + +extension AnyEncodable: _AnyEncodable {} + +// MARK: - Encodable + +extension _AnyEncodable { + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + + switch value { + #if canImport(Foundation) + case is NSNull: + try container.encodeNil() + #endif + case is Void: + try container.encodeNil() + case let bool as Bool: + try container.encode(bool) + case let int as Int: + try container.encode(int) + case let int8 as Int8: + try container.encode(int8) + case let int16 as Int16: + try container.encode(int16) + case let int32 as Int32: + try container.encode(int32) + case let int64 as Int64: + try container.encode(int64) + case let uint as UInt: + try container.encode(uint) + case let uint8 as UInt8: + try container.encode(uint8) + case let uint16 as UInt16: + try container.encode(uint16) + case let uint32 as UInt32: + try container.encode(uint32) + case let uint64 as UInt64: + try container.encode(uint64) + case let float as Float: + try container.encode(float) + case let double as Double: + try container.encode(double) + case let string as String: + try container.encode(string) + #if canImport(Foundation) + case let number as NSNumber: + try encode(nsnumber: number, into: &container) + case let date as Date: + try container.encode(date) + case let url as URL: + try container.encode(url) + #endif + case let array as [Any?]: + try container.encode(array.map { AnyEncodable($0) }) + case let dictionary as [String: Any?]: + try container.encode(dictionary.mapValues { AnyEncodable($0) }) + case let encodable as Encodable: + try encodable.encode(to: encoder) + default: + let context = EncodingError.Context(codingPath: container.codingPath, debugDescription: "AnyEncodable value cannot be encoded") + throw EncodingError.invalidValue(value, context) + } + } + + #if canImport(Foundation) + private func encode(nsnumber: NSNumber, into container: inout SingleValueEncodingContainer) throws { + switch Character(Unicode.Scalar(UInt8(nsnumber.objCType.pointee))) { + case "B": + try container.encode(nsnumber.boolValue) + case "c": + try container.encode(nsnumber.int8Value) + case "s": + try container.encode(nsnumber.int16Value) + case "i", "l": + try container.encode(nsnumber.int32Value) + case "q": + try container.encode(nsnumber.int64Value) + case "C": + try container.encode(nsnumber.uint8Value) + case "S": + try container.encode(nsnumber.uint16Value) + case "I", "L": + try container.encode(nsnumber.uint32Value) + case "Q": + try container.encode(nsnumber.uint64Value) + case "f": + try container.encode(nsnumber.floatValue) + case "d": + try container.encode(nsnumber.doubleValue) + default: + let context = EncodingError.Context(codingPath: container.codingPath, debugDescription: "NSNumber cannot be encoded because its type is not handled") + throw EncodingError.invalidValue(nsnumber, context) + } + } + #endif +} + +extension AnyEncodable: Equatable { + public static func == (lhs: AnyEncodable, rhs: AnyEncodable) -> Bool { + switch (lhs.value, rhs.value) { + case is (Void, Void): + return true + case let (lhs as Bool, rhs as Bool): + return lhs == rhs + case let (lhs as Int, rhs as Int): + return lhs == rhs + case let (lhs as Int8, rhs as Int8): + return lhs == rhs + case let (lhs as Int16, rhs as Int16): + return lhs == rhs + case let (lhs as Int32, rhs as Int32): + return lhs == rhs + case let (lhs as Int64, rhs as Int64): + return lhs == rhs + case let (lhs as UInt, rhs as UInt): + return lhs == rhs + case let (lhs as UInt8, rhs as UInt8): + return lhs == rhs + case let (lhs as UInt16, rhs as UInt16): + return lhs == rhs + case let (lhs as UInt32, rhs as UInt32): + return lhs == rhs + case let (lhs as UInt64, rhs as UInt64): + return lhs == rhs + case let (lhs as Float, rhs as Float): + return lhs == rhs + case let (lhs as Double, rhs as Double): + return lhs == rhs + case let (lhs as String, rhs as String): + return lhs == rhs + case let (lhs as [String: AnyEncodable], rhs as [String: AnyEncodable]): + return lhs == rhs + case let (lhs as [AnyEncodable], rhs as [AnyEncodable]): + return lhs == rhs + default: + return false + } + } +} + +extension AnyEncodable: CustomStringConvertible { + public var description: String { + switch value { + case is Void: + return String(describing: nil as Any?) + case let value as CustomStringConvertible: + return value.description + default: + return String(describing: value) + } + } +} + +extension AnyEncodable: CustomDebugStringConvertible { + public var debugDescription: String { + switch value { + case let value as CustomDebugStringConvertible: + return "AnyEncodable(\(value.debugDescription))" + default: + return "AnyEncodable(\(description))" + } + } +} + +extension AnyEncodable: ExpressibleByNilLiteral {} +extension AnyEncodable: ExpressibleByBooleanLiteral {} +extension AnyEncodable: ExpressibleByIntegerLiteral {} +extension AnyEncodable: ExpressibleByFloatLiteral {} +extension AnyEncodable: ExpressibleByStringLiteral {} +extension AnyEncodable: ExpressibleByStringInterpolation {} +extension AnyEncodable: ExpressibleByArrayLiteral {} +extension AnyEncodable: ExpressibleByDictionaryLiteral {} + +extension _AnyEncodable { + public init(nilLiteral _: ()) { + self.init(nil as Any?) + } + + public init(booleanLiteral value: Bool) { + self.init(value) + } + + public init(integerLiteral value: Int) { + self.init(value) + } + + public init(floatLiteral value: Double) { + self.init(value) + } + + public init(extendedGraphemeClusterLiteral value: String) { + self.init(value) + } + + public init(stringLiteral value: String) { + self.init(value) + } + + public init(arrayLiteral elements: Any...) { + self.init(elements) + } + + public init(dictionaryLiteral elements: (AnyHashable, Any)...) { + self.init([AnyHashable: Any](elements, uniquingKeysWith: { first, _ in first })) + } +} + +extension AnyEncodable: Hashable { + public func hash(into hasher: inout Hasher) { + switch value { + case let value as Bool: + hasher.combine(value) + case let value as Int: + hasher.combine(value) + case let value as Int8: + hasher.combine(value) + case let value as Int16: + hasher.combine(value) + case let value as Int32: + hasher.combine(value) + case let value as Int64: + hasher.combine(value) + case let value as UInt: + hasher.combine(value) + case let value as UInt8: + hasher.combine(value) + case let value as UInt16: + hasher.combine(value) + case let value as UInt32: + hasher.combine(value) + case let value as UInt64: + hasher.combine(value) + case let value as Float: + hasher.combine(value) + case let value as Double: + hasher.combine(value) + case let value as String: + hasher.combine(value) + case let value as [String: AnyEncodable]: + hasher.combine(value) + case let value as [AnyEncodable]: + hasher.combine(value) + default: + break + } + } +} diff --git a/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json b/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 00000000..eb878970 --- /dev/null +++ b/kram-profile/kram-profile/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000..7a43aac9 --- /dev/null +++ b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,67 @@ +{ + "images" : [ + { + "idiom" : "mac", + "scale" : "1x", + "size" : "16x16" + }, + { + "filename" : "Icon-32 1.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "16x16" + }, + { + "filename" : "Icon-32.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "32x32" + }, + { + "filename" : "Icon-64.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "32x32" + }, + { + "filename" : "Icon-128.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "128x128" + }, + { + "filename" : "Icon-256 1.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "128x128" + }, + { + "filename" : "Icon-256.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "256x256" + }, + { + "filename" : "Icon-512 1.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "256x256" + }, + { + "filename" : "Icon-512.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "512x512" + }, + { + "filename" : "Icon-1024.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "512x512" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png new file mode 100644 index 00000000..920a6bed Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-1024.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png new file mode 100644 index 00000000..f270ebd2 Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-128.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png new file mode 100644 index 00000000..a4dc172e Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png new file mode 100644 index 00000000..a4dc172e Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-256.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png new file mode 100644 index 00000000..8ef2897e Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png new file mode 100644 index 00000000..8ef2897e Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-32.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png new file mode 100644 index 00000000..46d1b727 Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png new file mode 100644 index 00000000..46d1b727 Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-512.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png new file mode 100644 index 00000000..0957f7bd Binary files /dev/null and b/kram-profile/kram-profile/Assets.xcassets/AppIcon.appiconset/Icon-64.png differ diff --git a/kram-profile/kram-profile/Assets.xcassets/Contents.json b/kram-profile/kram-profile/Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/kram-profile/kram-profile/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/kram-profile/kram-profile/DataCompression.swift b/kram-profile/kram-profile/DataCompression.swift new file mode 100644 index 00000000..8bc0ea1d --- /dev/null +++ b/kram-profile/kram-profile/DataCompression.swift @@ -0,0 +1,513 @@ +/// +/// DataCompression +/// +/// A libcompression wrapper as an extension for the `Data` type +/// (GZIP, ZLIB, LZFSE, LZMA, LZ4, deflate, RFC-1950, RFC-1951, RFC-1952) +/// +/// Created by Markus Wanke, 2016/12/05 +/// + + +/// +/// Apache License, Version 2.0 +/// +/// Copyright 2016, Markus Wanke +/// https://github.com/mw99/DataCompression?tab=readme-ov-file +/// +/// Licensed under the Apache License, Version 2.0 (the "License"); +/// you may not use this file except in compliance with the License. +/// You may obtain a copy of the License at +/// +/// http://www.apache.org/licenses/LICENSE-2.0 +/// +/// Unless required by applicable law or agreed to in writing, software +/// distributed under the License is distributed on an "AS IS" BASIS, +/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +/// See the License for the specific language governing permissions and +/// limitations under the License. +/// + + +import Foundation +import Compression + +public extension Data +{ + /// Compresses the data. + /// - parameter withAlgorithm: Compression algorithm to use. See the `CompressionAlgorithm` type + /// - returns: compressed data + func compress(withAlgorithm algo: CompressionAlgorithm) -> Data? + { + return self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: algo.lowLevelType) + return perform(config, source: sourcePtr, sourceSize: count) + } + } + + /// Decompresses the data. + /// - parameter withAlgorithm: Compression algorithm to use. See the `CompressionAlgorithm` type + /// - returns: decompressed data + func decompress(withAlgorithm algo: CompressionAlgorithm) -> Data? + { + return self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: algo.lowLevelType) + return perform(config, source: sourcePtr, sourceSize: count) + } + } + + /// Please consider the [libcompression documentation](https://developer.apple.com/reference/compression/1665429-data_compression) + /// for further details. Short info: + /// zlib : Aka deflate. Fast with a good compression rate. Proved itself over time and is supported everywhere. + /// lzfse : Apples custom Lempel-Ziv style compression algorithm. Claims to compress as good as zlib but 2 to 3 times faster. + /// lzma : Horribly slow. Compression as well as decompression. Compresses better than zlib though. + /// lz4 : Fast, but compression rate is very bad. Apples lz4 implementation often to not compress at all. + enum CompressionAlgorithm + { + case zlib + case lzfse + case lzma + case lz4 + } + + /// Compresses the data using the zlib deflate algorithm. + /// - returns: raw deflated data according to [RFC-1951](https://tools.ietf.org/html/rfc1951). + /// - note: Fixed at compression level 5 (best trade off between speed and time) + func deflate() -> Data? + { + return self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count) + } + } + + /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a raw deflate + /// stream according to [RFC-1951](https://tools.ietf.org/html/rfc1951). + /// - returns: uncompressed data + func inflate() -> Data? + { + return self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count) + } + } + + /// Compresses the data using the deflate algorithm and makes it comply to the zlib format. + /// - returns: deflated data in zlib format [RFC-1950](https://tools.ietf.org/html/rfc1950) + /// - note: Fixed at compression level 5 (best trade off between speed and time) + func zlib() -> Data? + { + let header = Data([0x78, 0x5e]) + + let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count, preload: header) + } + + guard var result = deflated else { return nil } + + var adler = self.adler32().checksum.bigEndian + result.append(Data(bytes: &adler, count: MemoryLayout.size)) + + return result + } + + /// Decompresses the data using the zlib deflate algorithm. Self is expected to be a zlib deflate + /// stream according to [RFC-1950](https://tools.ietf.org/html/rfc1950). + /// - returns: uncompressed data + func unzlib(skipCheckSumValidation: Bool = true) -> Data? + { + // 2 byte header + 4 byte adler32 checksum + let overhead = 6 + guard count > overhead else { return nil } + + let header: UInt16 = withUnsafeBytes { (ptr: UnsafePointer) -> UInt16 in + return ptr.pointee.bigEndian + } + + // check for the deflate stream bit + guard header >> 8 & 0b1111 == 0b1000 else { return nil } + // check the header checksum + guard header % 31 == 0 else { return nil } + + let cresult: Data? = withUnsafeBytes { (ptr: UnsafePointer) -> Data? in + let source = ptr.advanced(by: 2) + let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: source, sourceSize: count - overhead) + } + + guard let inflated = cresult else { return nil } + + if skipCheckSumValidation { return inflated } + + let cksum = Data(self.suffix(from: count - 4)).withUnsafeBytes { rawPtr in + return rawPtr.load(as: UInt32.self).bigEndian + } + + return cksum == inflated.adler32().checksum ? inflated : nil + } + + /// Compresses the data using the deflate algorithm and makes it comply to the gzip stream format. + /// - returns: deflated data in gzip format [RFC-1952](https://tools.ietf.org/html/rfc1952) + /// - note: Fixed at compression level 5 (best trade off between speed and time) + func gzip() -> Data? + { + var header = Data([0x1f, 0x8b, 0x08, 0x00]) // magic, magic, deflate, noflags + + var unixtime = UInt32(Date().timeIntervalSince1970).littleEndian + header.append(Data(bytes: &unixtime, count: MemoryLayout.size)) + + header.append(contentsOf: [0x00, 0x03]) // normal compression level, unix file type + + let deflated = self.withUnsafeBytes { (sourcePtr: UnsafePointer) -> Data? in + let config = (operation: COMPRESSION_STREAM_ENCODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: sourcePtr, sourceSize: count, preload: header) + } + + guard var result = deflated else { return nil } + + // append checksum + var crc32: UInt32 = self.crc32().checksum.littleEndian + result.append(Data(bytes: &crc32, count: MemoryLayout.size)) + + // append size of original data + var isize: UInt32 = UInt32(truncatingIfNeeded: count).littleEndian + result.append(Data(bytes: &isize, count: MemoryLayout.size)) + + return result + } + + /// Decompresses the data using the gzip deflate algorithm. Self is expected to be a gzip deflate + /// stream according to [RFC-1952](https://tools.ietf.org/html/rfc1952). + /// - returns: uncompressed data + func gunzip() -> Data? + { + // 10 byte header + data + 8 byte footer. See https://tools.ietf.org/html/rfc1952#section-2 + let overhead = 10 + 8 + guard count >= overhead else { return nil } + + + typealias GZipHeader = (id1: UInt8, id2: UInt8, cm: UInt8, flg: UInt8, xfl: UInt8, os: UInt8) + let hdr: GZipHeader = withUnsafeBytes { (ptr: UnsafePointer) -> GZipHeader in + // +---+---+---+---+---+---+---+---+---+---+ + // |ID1|ID2|CM |FLG| MTIME |XFL|OS | + // +---+---+---+---+---+---+---+---+---+---+ + return (id1: ptr[0], id2: ptr[1], cm: ptr[2], flg: ptr[3], xfl: ptr[8], os: ptr[9]) + } + + typealias GZipFooter = (crc32: UInt32, isize: UInt32) + let alignedFtr = Data(self.suffix(from: count - 8)) + let ftr: GZipFooter = alignedFtr.withUnsafeBytes { (ptr: UnsafePointer) -> GZipFooter in + // +---+---+---+---+---+---+---+---+ + // | CRC32 | ISIZE | + // +---+---+---+---+---+---+---+---+ + return (ptr[0].littleEndian, ptr[1].littleEndian) + } + + // Wrong gzip magic or unsupported compression method + guard hdr.id1 == 0x1f && hdr.id2 == 0x8b && hdr.cm == 0x08 else { return nil } + + let has_crc16: Bool = hdr.flg & 0b00010 != 0 + let has_extra: Bool = hdr.flg & 0b00100 != 0 + let has_fname: Bool = hdr.flg & 0b01000 != 0 + let has_cmmnt: Bool = hdr.flg & 0b10000 != 0 + + let cresult: Data? = withUnsafeBytes { (ptr: UnsafePointer) -> Data? in + var pos = 10 ; let limit = count - 8 + + if has_extra { + pos += ptr.advanced(by: pos).withMemoryRebound(to: UInt16.self, capacity: 1) { + return Int($0.pointee.littleEndian) + 2 // +2 for xlen + } + } + if has_fname { + while pos < limit && ptr[pos] != 0x0 { pos += 1 } + pos += 1 // skip null byte as well + } + if has_cmmnt { + while pos < limit && ptr[pos] != 0x0 { pos += 1 } + pos += 1 // skip null byte as well + } + if has_crc16 { + pos += 2 // ignoring header crc16 + } + + guard pos < limit else { return nil } + let config = (operation: COMPRESSION_STREAM_DECODE, algorithm: COMPRESSION_ZLIB) + return perform(config, source: ptr.advanced(by: pos), sourceSize: limit - pos) + } + + guard let inflated = cresult else { return nil } + guard ftr.isize == UInt32(truncatingIfNeeded: inflated.count) else { return nil } + guard ftr.crc32 == inflated.crc32().checksum else { return nil } + return inflated + } + + /// Calculate the Adler32 checksum of the data. + /// - returns: Adler32 checksum type. Can still be further advanced. + func adler32() -> Adler32 + { + var res = Adler32() + res.advance(withChunk: self) + return res + } + + /// Calculate the Crc32 checksum of the data. + /// - returns: Crc32 checksum type. Can still be further advanced. + func crc32() -> Crc32 + { + var res = Crc32() + res.advance(withChunk: self) + return res + } +} + + + + +/// Struct based type representing a Crc32 checksum. +public struct Crc32: CustomStringConvertible +{ + private static let zLibCrc32: ZLibCrc32FuncPtr? = loadCrc32fromZLib() + + public init() {} + + // C convention function pointer type matching the signature of `libz::crc32` + private typealias ZLibCrc32FuncPtr = @convention(c) ( + _ cks: UInt32, + _ buf: UnsafePointer, + _ len: UInt32 + ) -> UInt32 + + /// Raw checksum. Updated after a every call to `advance(withChunk:)` + public var checksum: UInt32 = 0 + + /// Advance the current checksum with a chunk of data. Designed t be called multiple times. + /// - parameter chunk: data to advance the checksum + public mutating func advance(withChunk chunk: Data) + { + if let fastCrc32 = Crc32.zLibCrc32 { + checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer) -> UInt32 in + return fastCrc32(checksum, ptr, UInt32(chunk.count)) + }) + } + else { + checksum = slowCrc32(start: checksum, data: chunk) + } + } + + /// Formatted checksum. + public var description: String + { + return String(format: "%08x", checksum) + } + + /// Load `crc32()` from '/usr/lib/libz.dylib' if libz is installed. + /// - returns: A function pointer to crc32() of zlib or nil if zlib can't be found + private static func loadCrc32fromZLib() -> ZLibCrc32FuncPtr? + { + guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil } + guard let fptr = dlsym(libz, "crc32") else { return nil } + return unsafeBitCast(fptr, to: ZLibCrc32FuncPtr.self) + } + + /// Rudimentary fallback implementation of the crc32 checksum. This is only a backup used + /// when zlib can't be found under '/usr/lib/libz.dylib'. + /// - returns: crc32 checksum (4 byte) + private func slowCrc32(start: UInt32, data: Data) -> UInt32 + { + return ~data.reduce(~start) { (crc: UInt32, next: UInt8) -> UInt32 in + let tableOffset = (crc ^ UInt32(next)) & 0xff + return lookUpTable[Int(tableOffset)] ^ crc >> 8 + } + } + + /// Lookup table for faster crc32 calculation. + /// table source: http://web.mit.edu/freebsd/head/sys/libkern/crc32.c + private let lookUpTable: [UInt32] = [ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + ] +} + + + + + +/// Struct based type representing a Adler32 checksum. +public struct Adler32: CustomStringConvertible +{ + private static let zLibAdler32: ZLibAdler32FuncPtr? = loadAdler32fromZLib() + + public init() {} + + // C convention function pointer type matching the signature of `libz::adler32` + private typealias ZLibAdler32FuncPtr = @convention(c) ( + _ cks: UInt32, + _ buf: UnsafePointer, + _ len: UInt32 + ) -> UInt32 + + /// Raw checksum. Updated after a every call to `advance(withChunk:)` + public var checksum: UInt32 = 1 + + /// Advance the current checksum with a chunk of data. Designed t be called multiple times. + /// - parameter chunk: data to advance the checksum + public mutating func advance(withChunk chunk: Data) + { + if let fastAdler32 = Adler32.zLibAdler32 { + checksum = chunk.withUnsafeBytes({ (ptr: UnsafePointer) -> UInt32 in + return fastAdler32(checksum, ptr, UInt32(chunk.count)) + }) + } + else { + checksum = slowAdler32(start: checksum, data: chunk) + } + } + + /// Formatted checksum. + public var description: String + { + return String(format: "%08x", checksum) + } + + /// Load `adler32()` from '/usr/lib/libz.dylib' if libz is installed. + /// - returns: A function pointer to adler32() of zlib or nil if zlib can't be found + private static func loadAdler32fromZLib() -> ZLibAdler32FuncPtr? + { + guard let libz = dlopen("/usr/lib/libz.dylib", RTLD_NOW) else { return nil } + guard let fptr = dlsym(libz, "adler32") else { return nil } + return unsafeBitCast(fptr, to: ZLibAdler32FuncPtr.self) + } + + /// Rudimentary fallback implementation of the adler32 checksum. This is only a backup used + /// when zlib can't be found under '/usr/lib/libz.dylib'. + /// - returns: adler32 checksum (4 byte) + private func slowAdler32(start: UInt32, data: Data) -> UInt32 + { + var s1: UInt32 = start & 0xffff + var s2: UInt32 = (start >> 16) & 0xffff + let prime: UInt32 = 65521 + + for byte in data { + s1 += UInt32(byte) + if s1 >= prime { s1 = s1 % prime } + s2 += s1 + if s2 >= prime { s2 = s2 % prime } + } + return (s2 << 16) | s1 + } +} + + + +fileprivate extension Data +{ + func withUnsafeBytes(_ body: (UnsafePointer) throws -> ResultType) rethrows -> ResultType + { + return try self.withUnsafeBytes({ (rawBufferPointer: UnsafeRawBufferPointer) -> ResultType in + return try body(rawBufferPointer.bindMemory(to: ContentType.self).baseAddress!) + }) + } +} + +fileprivate extension Data.CompressionAlgorithm +{ + var lowLevelType: compression_algorithm { + switch self { + case .zlib : return COMPRESSION_ZLIB + case .lzfse : return COMPRESSION_LZFSE + case .lz4 : return COMPRESSION_LZ4 + case .lzma : return COMPRESSION_LZMA + } + } +} + + +fileprivate typealias Config = (operation: compression_stream_operation, algorithm: compression_algorithm) + + +fileprivate func perform(_ config: Config, source: UnsafePointer, sourceSize: Int, preload: Data = Data()) -> Data? +{ + guard config.operation == COMPRESSION_STREAM_ENCODE || sourceSize > 0 else { return nil } + + let streamBase = UnsafeMutablePointer.allocate(capacity: 1) + defer { streamBase.deallocate() } + var stream = streamBase.pointee + + let status = compression_stream_init(&stream, config.operation, config.algorithm) + guard status != COMPRESSION_STATUS_ERROR else { return nil } + defer { compression_stream_destroy(&stream) } + + var result = preload + var flags: Int32 = Int32(COMPRESSION_STREAM_FINALIZE.rawValue) + let blockLimit = 64 * 1024 + var bufferSize = Swift.max(sourceSize, 64) + + if sourceSize > blockLimit { + bufferSize = blockLimit + if config.algorithm == COMPRESSION_LZFSE && config.operation != COMPRESSION_STREAM_ENCODE { + // This fixes a bug in Apples lzfse decompressor. it will sometimes fail randomly when the input gets + // splitted into multiple chunks and the flag is not 0. Even though it should always work with FINALIZE... + flags = 0 + } + } + + let buffer = UnsafeMutablePointer.allocate(capacity: bufferSize) + defer { buffer.deallocate() } + + stream.dst_ptr = buffer + stream.dst_size = bufferSize + stream.src_ptr = source + stream.src_size = sourceSize + + while true { + switch compression_stream_process(&stream, flags) { + case COMPRESSION_STATUS_OK: + guard stream.dst_size == 0 else { return nil } + result.append(buffer, count: stream.dst_ptr - buffer) + stream.dst_ptr = buffer + stream.dst_size = bufferSize + + if flags == 0 && stream.src_size == 0 { // part of the lzfse bugfix above + flags = Int32(COMPRESSION_STREAM_FINALIZE.rawValue) + } + + case COMPRESSION_STATUS_END: + result.append(buffer, count: stream.dst_ptr - buffer) + return result + + default: + return nil + } + } +} diff --git a/kram-profile/kram-profile/File.swift b/kram-profile/kram-profile/File.swift new file mode 100644 index 00000000..16b5ed04 --- /dev/null +++ b/kram-profile/kram-profile/File.swift @@ -0,0 +1,601 @@ +// kram - Copyright 2020-2024 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +import Foundation + +private let log = Log("kram/File") + +//------------- + +enum ContainerType { + case Archive // zip of 1+ files, can't enforce + case Compressed // gzip of 1 file, can't enforce + case Folder // from a folder drop + case File // means file was dropped or opened directly +} + +enum FileType { + case Build + case Memory + case Perf + case Unknown +} + +class BuildStats { + var frontendStart = Int.max + var backendStart = Int.max + + var totalExecuteCompiler = 0 + + var totalFrontend = 0 + var totalSource = 0 + var totalInstantiateFunction = 0 + var totalInstantiateClass = 0 + var totalCodeGenFunction = 0 + var totalDebugType = 0 + + var totalBackend = 0 + var totalOptimizer = 0 + var totalCodeGenPasses = 0 + var totalOptFunction = 0 + + func combine(_ rhs: BuildStats) { + totalExecuteCompiler += rhs.totalExecuteCompiler + + totalFrontend += rhs.totalFrontend + totalSource += rhs.totalSource + totalInstantiateFunction += rhs.totalInstantiateFunction + totalInstantiateClass += rhs.totalInstantiateClass + totalCodeGenFunction += rhs.totalCodeGenFunction + totalDebugType += rhs.totalDebugType + + totalBackend += rhs.totalBackend + totalOptimizer += rhs.totalOptimizer + totalCodeGenPasses += rhs.totalCodeGenPasses + totalOptFunction += rhs.totalOptFunction + } + + func divideBy(_ s: Int) { + frontendStart /= s + backendStart /= s + + totalExecuteCompiler /= s + + totalFrontend /= s + totalSource /= s + totalInstantiateFunction /= s + totalInstantiateClass /= s + totalCodeGenFunction /= s + totalDebugType /= s + + totalBackend /= s + totalOptimizer /= s + totalCodeGenPasses /= s + totalOptFunction /= s + } +} + +class File: Identifiable, Hashable, Equatable, Comparable, @unchecked Sendable +{ + // TODO: archive url relative to archive so not unique if multiple archives dropped + // but currently all lookup is by url, and not url + archive. Just make sure to + // include unique dir when building archives. zip has max 512 char path. + + var id: String { url.absoluteString } + var name: String { url.lastPathComponent } + let url: URL + let shortDirectory: String + let parentFolders: String + let fileType: FileType + + // optional container + let containerType: ContainerType + var archive: Archive? + + var duration = 0.0 // in seconds + + var fileContent: Data? + var modStamp: Date? + var loadStamp: Date? + + // This is only updated for Build fileType + var buildTimings: [String:BuildTiming] = [:] + var buildFunctionTimings = BuildFunctionTimings() + var buildStats: BuildStats? + + // only available for memory file type right now + var threadInfo = "" + + init(url: URL) { + self.url = url + self.modStamp = File.fileModificationDate(url:url) + self.shortDirectory = File.buildShortDirectory(url:url) + self.parentFolders = url.deletingLastPathComponent().absoluteString + self.containerType = File.filenameToContainerType(url) + self.fileType = File.filenameToFileType(url) + } + + public static func == (lhs: File, rhs: File) -> Bool { + return lhs.id == rhs.id + } + public static func < (lhs: File, rhs: File) -> Bool { + return lhs.id < rhs.id + } + + // Hashable + public func hash(into hasher: inout Hasher) { + hasher.combine(id) + } + + // call this when the file is loaded + public func setLoadStamp() { + loadStamp = modStamp + } + public func isReloadNeeded() -> Bool { + return modStamp != loadStamp + } + + public func eraseFileContent() { + // fileContent should get reloaded + fileContent = nil + + // Perfetto should reload the fileContent + loadStamp = nil + } + + public func eraseCaches() { + duration = 0.0 + + if fileType == .Build { + // for build fileType + buildTimings.removeAll() + buildFunctionTimings.reset() + buildStats = nil + } + else if fileType == .Memory { + // for memory fileType + threadInfo.removeAll() + } + } + + public static func fileModificationDate(url: URL) -> Date? { + do { + let attr = try FileManager.default.attributesOfItem(atPath: url.path) + return attr[FileAttributeKey.modificationDate] as? Date + } catch { + return nil + } + } + + // show some of dir file is in, TODO: 2 levels not enough? + public static func buildShortDirectory(url: URL) -> String { + let count = url.pathComponents.count + + // dir0/dir1/file.ext + // -3/-2/-1 + + var str = "" + if count >= 3 { + str += url.pathComponents[count-3] + str += "/" + } + if count >= 2 { + str += url.pathComponents[count-2] + } + + return str + } + + public static func filenameToContainerType(_ url: URL) -> ContainerType { + let ext = url.pathExtension + + if ext == "zip" { + return .Archive + } + if ext == "gz" { // could be a tarball archive, but don't support that + return .Compressed + } + return .File + } + + public static func filenameToFileType(_ url: URL) -> FileType { + let ext = url.pathExtension + + if File.filenameToContainerType(url) != .File { + // strip the .gz/.zip + return filenameToFileType(url.deletingPathExtension()) + } + + if ext == "json" || ext == "buildtrace" { // build + return .Build + } + else if ext == "memtrace" { // memory + return .Memory + } + // TODO: eliminate trace + else if ext == "trace" || ext == "perftrace" { // profile + return .Perf + } + return .Unknown + } +} + +func generateDuration(file: File) -> String { + if file.duration == 0.0 { return "" } + + let unitText = file.fileType == .Memory ? "m" : "s" + return "\(double:file.duration, decimals:3)\(unitText)" +} + +func generateTotalDuration(_ file: File, _ buildFiles: [File]) -> String { + if buildFiles.isEmpty { return "" } + + // add up duration of all files related to selection + var totalDuration = 0.0 + for buildFile in buildFiles { + totalDuration += buildFile.duration + } + + if totalDuration == 0.0 { return "" } + var text = "/ \(double:totalDuration, decimals:3)s " + + // only show percent if high enough + let totalPercent = file.duration * 100.0 / totalDuration + if totalPercent >= 1 { + text += "\(double:totalPercent, decimals:0)% " + } + return text +} + +@MainActor +func generateNavigationTitle(_ sel: String?, _ files: [File]) -> String { + if sel == nil { return "" } + + let f = lookupFile(selection: sel!) + var text = generateDuration(file: f) + " " + + // total the durations matching the selection + if f.fileType == .Build { + let buildFiles = findFilesForBuildTimings(files: files, selection: sel!) + text += generateTotalDuration(f, buildFiles) + } + + // add the shortened filename + text += f.name + + // add the archive name + if let fileArchive = f.archive { + text += " in (" + fileArchive.name + ")" + } + + return text +} + +//------------- +// Note: if a file is deleted which happens often with builds, +// then want to identify that and update the list. At least +// indicate the item is gone, and await its return. + +// Holds supported files dropped or opened from Finder, reload reparses this +@MainActor var droppedFileCache : [URL] = [] + +// Flattened list of supported files from folders and archives +@MainActor var fileCache : [URL:File] = [:] + +@MainActor +func updateFile(url: URL) -> File { + let file = File(url:url) + + // This preseves the duration previously parsed and stored + + if let fileOld = fileCache[file.url] { + if file.modStamp == nil || // means file and/or dir went away, so return fileOld + file.modStamp! == fileOld.modStamp! { + return fileOld + } + } + + // This wipes the duration, so it can be recomputed + // TODO: may want to check crc32 if present before wiping all data + + if file.archive == nil { + file.eraseFileContent() + file.eraseCaches() + } + + fileCache[file.url] = file + + return file +} + +@MainActor +func lookupFile(url: URL) -> File { + let file = File(url:url) + if let fileOld = fileCache[file.url] { + return fileOld + } + return file +} + +@MainActor +func lookupFile(selection: String) -> File { + return lookupFile(url:URL(string:selection)!) +} + +//------------- + +class Archive: Identifiable, /*Hashable, */ Equatable, Comparable { + // this doesn't change on reload + var id: String { url.absoluteString } + var name: String { url.lastPathComponent } + let url: URL + let shortDirectory: String + let parentFolders: String + + // This can call change + var modStamp: Date? + var loadStamp: Date? + + var archiveContent: Data? + var archive: ZipHelperW? + + init(_ url: URL) { + self.url = url + self.modStamp = File.fileModificationDate(url:url) + self.shortDirectory = File.buildShortDirectory(url:url) + self.parentFolders = url.deletingLastPathComponent().absoluteString + } + + func open() { + if loadStamp == nil { + loadStamp = modStamp + + do { + archiveContent = try Data(contentsOf: url, options: [.mappedIfSafe]) + archive = ZipHelperW(data: archiveContent!) + } + catch { + log.error(error.localizedDescription) + } + } + } + + public static func == (lhs: Archive, rhs: Archive) -> Bool { + return lhs.id == rhs.id + } + public static func < (lhs: Archive, rhs: Archive) -> Bool { + return lhs.id < rhs.id + } + + public func isReloadNeeded() -> Bool { + return modStamp != loadStamp + } +} + +// cache of archives to avoid creating these each time +@MainActor var archiveCache: [URL:Archive] = [:] + +@MainActor +func lookupArchive(_ url: URL) -> Archive { + let archive = Archive(url) + + // This preseves the content in the archive, and across all files with held content + if let archiveOld = archiveCache[archive.url] { + if archive.modStamp == nil || // means file and/or dir went away, so return fileOld + archive.modStamp! == archiveOld.modStamp! { + return archiveOld + } + + archive.open() + + // replace any archives with this one + for file in fileCache.values { + if file.archive == archiveOld { + + // Only need to release caches if hash differs + let filename = file.url.absoluteString + + let oldEntry = archiveOld.archive!.zipEntry(byName: filename) + let newEntry = archive.archive!.zipEntry(byName: filename) + + let isNewEntryMissing = String(cString:newEntry.filename) == "" + + if isNewEntryMissing { + // TODO: handle new archive missing the file + // need to release file + } + else { + // update the archive + file.archive = archive + + // convert zip modStamp to Data object (only valid to seconds) + file.modStamp = Date(timeIntervalSince1970: Double(newEntry.modificationDate)) + } + + if !isNewEntryMissing && (oldEntry.crc32 == newEntry.crc32) { + + // erase fileContent since it may alias mmap going away + file.eraseFileContent() + + // keep any caches + } + else { + // erase fileContent + file.loadStamp = nil + file.fileContent = nil + + file.eraseFileContent() + file.eraseCaches() + } + } + } + } + else { + archive.open() + } + + // Files will need to reopen content, but only if the hash is different. + archiveCache[archive.url] = archive + + return archive +} + +//------------- + +func loadFileContent(_ file: File) -> Data { + if file.fileContent != nil { + return file.fileContent! + } + + if file.archive != nil { + // this will point to a section of an mmaped zip archive + // but it may have to decompress content to a Data object + file.fileContent = file.archive!.archive!.extract(file.url.absoluteString) + } + else { + // This uses mmap if safe. Does not count towars memory totals, since can be paged out + do { + file.fileContent = try Data(contentsOf: file.url, options: [.mappedIfSafe]) + } + catch { + log.error(error.localizedDescription) + } + } + + return file.fileContent! +} + +func isSupportedFilename(_ url: URL) -> Bool { + let ext = url.pathExtension + + // what ext does trace.zip, or trace.gz come in as ? + // should this limit compressed files to the names supported below + + // Apple and Microsoft store resource fork data in "._Filename.trace" files + // so need to ignore these in the lists. These don't occur from CLI zip, + // only from using Finder "Compress" + if url.lastPathComponent.starts(with: "._") { + return false + } + + if ext == "gz" { + return true + } + if ext == "zip" { + return true + } + + // clang build files use generic .json ext + if ext == "json" || ext == "buildtrace" { + let filename = url.lastPathComponent + + // filter out some by name, so don't have to open files + if filename == "build-description.json" || + filename == "build-request.json" || + filename == "manifest.json" || + filename.hasSuffix("diagnostic-filename-map.json") || + filename.hasSuffix(".abi.json") || + filename.hasSuffix("-OutputFileMap.json") || + filename.hasSuffix("_const_extract_protocols.json") + { + return false + } + return true + } + + // profiling + if ext == "perftrace" || ext == "trace" { + return true + } + + // memory + if ext == "memtrace" { + return true + } + + return false +} + +@MainActor +func listFilesFromArchive(_ urlArchive: URL) -> [File] { + + let archive = lookupArchive(urlArchive) + var files: [File] = [] + + let arc = archive.archive! + for i in 0.. [File] +{ + var files: [File] = [] + + for url in urls { + // now filter a list of all the files under the dir + if url.hasDirectoryPath { + // list out all matching files + // also these [.skipsHiddenFiles, .skipsSubdirectoryDescendants] + + // recurse into directory + let directoryEnumerator = FileManager.default.enumerator( + at: url, + includingPropertiesForKeys: nil + // options: [.skipsHiddenFiles] + ) + + while let fileURL = directoryEnumerator?.nextObject() as? URL { + if fileURL.hasDirectoryPath { continue } + + let isSupported = isSupportedFilename(fileURL) + if isSupported { + let isArchive = File.filenameToContainerType(fileURL) == .Archive + if isArchive { + files += listFilesFromArchive(fileURL) + } + else { + files.append(updateFile(url:fileURL)); + } + } + } + } + else if url.isFileURL { + let isSupported = isSupportedFilename(url) + if isSupported { + let isArchive = File.filenameToContainerType(url) == .Archive + if isArchive { + files += listFilesFromArchive(url) + } + else { + files.append(updateFile(url:url)) + } + } + } + } + + return files +} diff --git a/kram-profile/kram-profile/FileList.swift b/kram-profile/kram-profile/FileList.swift new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/kram-profile/kram-profile/FileList.swift @@ -0,0 +1 @@ + diff --git a/kram-profile/kram-profile/Info.plist b/kram-profile/kram-profile/Info.plist new file mode 100644 index 00000000..d8f7160e --- /dev/null +++ b/kram-profile/kram-profile/Info.plist @@ -0,0 +1,151 @@ + + + + + CFBundleDocumentTypes + + + CFBundleTypeName + memtrace + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + memtrace + + NSDocumentClass + KramDocument + + + CFBundleTypeName + trace + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + trace + + NSDocumentClass + KramDocument + + + CFBundleTypeName + json + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + public.json + + NSDocumentClass + KramDocument + + + CFBundleTypeName + gz + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + org.gnu.gnu-zip-archive + + NSDocumentClass + KramDocument + + + CFBundleTypeName + perftrace + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + perftrace + + NSDocumentClass + KramDocument + + + CFBundleTypeName + buildtrace + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + buildtrace + + NSDocumentClass + KramDocument + + + CFBundleTypeName + zip + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + public.zip-archive + + NSDocumentClass + KramDocument + + + UTImportedTypeDeclarations + + + UTTypeDescription + + UTTypeIcons + + UTTypeIdentifier + + UTTypeTagSpecification + + public.filename-extension + + + + + UTTypeDescription + + UTTypeIcons + + UTTypeIdentifier + + UTTypeTagSpecification + + public.filename-extension + + + + + UTTypeDescription + + UTTypeIcons + + UTTypeIdentifier + + UTTypeTagSpecification + + public.filename-extension + + \ + + + + + + diff --git a/kram-profile/kram-profile/Keycode.swift b/kram-profile/kram-profile/Keycode.swift new file mode 100644 index 00000000..1c4bcb1c --- /dev/null +++ b/kram-profile/kram-profile/Keycode.swift @@ -0,0 +1,134 @@ +// kram - Copyright 2020-2024 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +import Foundation + +// https://gist.github.com/swillits/df648e87016772c7f7e5dbed2b345066 +struct Keycode { + + // Layout-independent Keys + // eg.These key codes are always the same key on all layouts. + static let returnKey : UInt16 = 0x24 + static let enter : UInt16 = 0x4C + static let tab : UInt16 = 0x30 + static let space : UInt16 = 0x31 + static let delete : UInt16 = 0x33 + static let escape : UInt16 = 0x35 + static let command : UInt16 = 0x37 + static let shift : UInt16 = 0x38 + static let capsLock : UInt16 = 0x39 + static let option : UInt16 = 0x3A + static let control : UInt16 = 0x3B + static let rightCommand : UInt16 = 0x36 + static let rightShift : UInt16 = 0x3C + static let rightOption : UInt16 = 0x3D + static let rightControl : UInt16 = 0x3E + static let leftArrow : UInt16 = 0x7B + static let rightArrow : UInt16 = 0x7C + static let downArrow : UInt16 = 0x7D + static let upArrow : UInt16 = 0x7E + static let volumeUp : UInt16 = 0x48 + static let volumeDown : UInt16 = 0x49 + static let mute : UInt16 = 0x4A + static let help : UInt16 = 0x72 + static let home : UInt16 = 0x73 + static let pageUp : UInt16 = 0x74 + static let forwardDelete : UInt16 = 0x75 + static let end : UInt16 = 0x77 + static let pageDown : UInt16 = 0x79 + static let function : UInt16 = 0x3F + static let f1 : UInt16 = 0x7A + static let f2 : UInt16 = 0x78 + static let f4 : UInt16 = 0x76 + static let f5 : UInt16 = 0x60 + static let f6 : UInt16 = 0x61 + static let f7 : UInt16 = 0x62 + static let f3 : UInt16 = 0x63 + static let f8 : UInt16 = 0x64 + static let f9 : UInt16 = 0x65 + static let f10 : UInt16 = 0x6D + static let f11 : UInt16 = 0x67 + static let f12 : UInt16 = 0x6F + static let f13 : UInt16 = 0x69 + static let f14 : UInt16 = 0x6B + static let f15 : UInt16 = 0x71 + static let f16 : UInt16 = 0x6A + static let f17 : UInt16 = 0x40 + static let f18 : UInt16 = 0x4F + static let f19 : UInt16 = 0x50 + static let f20 : UInt16 = 0x5A + + // US-ANSI Keyboard Positions + // eg. These key codes are for the physical key (in any keyboard layout) + // at the location of the named key in the US-ANSI layout. + static let a : UInt16 = 0x00 + static let b : UInt16 = 0x0B + static let c : UInt16 = 0x08 + static let d : UInt16 = 0x02 + static let e : UInt16 = 0x0E + static let f : UInt16 = 0x03 + static let g : UInt16 = 0x05 + static let h : UInt16 = 0x04 + static let i : UInt16 = 0x22 + static let j : UInt16 = 0x26 + static let k : UInt16 = 0x28 + static let l : UInt16 = 0x25 + static let m : UInt16 = 0x2E + static let n : UInt16 = 0x2D + static let o : UInt16 = 0x1F + static let p : UInt16 = 0x23 + static let q : UInt16 = 0x0C + static let r : UInt16 = 0x0F + static let s : UInt16 = 0x01 + static let t : UInt16 = 0x11 + static let u : UInt16 = 0x20 + static let v : UInt16 = 0x09 + static let w : UInt16 = 0x0D + static let x : UInt16 = 0x07 + static let y : UInt16 = 0x10 + static let z : UInt16 = 0x06 + + static let zero : UInt16 = 0x1D + static let one : UInt16 = 0x12 + static let two : UInt16 = 0x13 + static let three : UInt16 = 0x14 + static let four : UInt16 = 0x15 + static let five : UInt16 = 0x17 + static let six : UInt16 = 0x16 + static let seven : UInt16 = 0x1A + static let eight : UInt16 = 0x1C + static let nine : UInt16 = 0x19 + + static let equals : UInt16 = 0x18 + static let minus : UInt16 = 0x1B + static let semicolon : UInt16 = 0x29 + static let apostrophe : UInt16 = 0x27 + static let comma : UInt16 = 0x2B + static let period : UInt16 = 0x2F + static let forwardSlash : UInt16 = 0x2C + static let backslash : UInt16 = 0x2A + static let grave : UInt16 = 0x32 + static let leftBracket : UInt16 = 0x21 + static let rightBracket : UInt16 = 0x1E + + static let keypadDecimal : UInt16 = 0x41 + static let keypadMultiply : UInt16 = 0x43 + static let keypadPlus : UInt16 = 0x45 + static let keypadClear : UInt16 = 0x47 + static let keypadDivide : UInt16 = 0x4B + static let keypadEnter : UInt16 = 0x4C + static let keypadMinus : UInt16 = 0x4E + static let keypadEquals : UInt16 = 0x51 + static let keypad0 : UInt16 = 0x52 + static let keypad1 : UInt16 = 0x53 + static let keypad2 : UInt16 = 0x54 + static let keypad3 : UInt16 = 0x55 + static let keypad4 : UInt16 = 0x56 + static let keypad5 : UInt16 = 0x57 + static let keypad6 : UInt16 = 0x58 + static let keypad7 : UInt16 = 0x59 + static let keypad8 : UInt16 = 0x5B + static let keypad9 : UInt16 = 0x5C +} + diff --git a/kram-profile/kram-profile/Log.swift b/kram-profile/kram-profile/Log.swift new file mode 100644 index 00000000..6c278c9b --- /dev/null +++ b/kram-profile/kram-profile/Log.swift @@ -0,0 +1,301 @@ +// kram - Copyright 2020-2024 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +import Foundation + +import os.log +import Darwin + +/* + Can strip away logs or add adititional data to messages more easily + + To use, in the Swift source file specify the following variable: + private let log = Log() -> Log(#file) <- paths are stripped + or + private let log = Log("File/Groupname") + + log.debug("debug text") + log.info("info text") + log.error("error text") + + A few boolean flags control the output: + configure(prints, timestamps, stacktraces) + + For more expensive functions, use the isInfoEnabled() call to skip blocks. + if log.isInfoEnabled() { + log.info("ComputeWorldPopulation:", countPeopleInAllCountries()) + } + + Output: + prints = true (via print) + 14:40:21.185 D[GameSceneViewController] debug text + 14:40:21.201 I[GameSceneViewController] info text + 14:40:21.321 E[GameSceneViewController] error text + at GameSceneViewController:75@init(_:file:) + on thread:queue + + or + 0.001s D[GameSceneViewController] debug text + 0.002s I[GameSceneViewController] info text + 0.003s E[GameSceneViewController] error text + at GameSceneViewController:75@init(_:file:) + on thread:queue + + prints = false (via os_log) + 2018-04-11 14:59:07.122127-0700 SwiftShot[581:21310] [GameSceneViewController] debug text + 2018-04-11 14:59:07.122166-0700 SwiftShot[581:21310] [GameSceneViewController] info text + 2018-04-11 14:59:07.122186-0700 SwiftShot[581:21310] [GameSceneViewController] error text +*/ + +struct LogState { + // verbose: Whether os_log or print is used to report logs. + let prints = false + // stacktrace: Whether stack trace is logged on errors. + let stacktraces = false + // timestamp: Show timestamps on all entries when printing statements. + let timestamps = false + // absoluteTimestamps: Show relative or absolute timestampes. + let absoluteTimestamps = true + + // Store data for timestamps. + let timestampToSeconds = initTimestampToSeconds() + let timestampStart: Double + let timestampStartDate = Date() + let timestampFormatter = initTimestampFormatter() + + // This can be filtered from command line arguments. + let subsystem = Bundle.main.bundleIdentifier! + + init() { + timestampStart = LogState.timestampStartTime(timestampToSeconds) + } + private static func initTimestampFormatter() -> DateFormatter { + let formatter = DateFormatter() + formatter.locale = Locale.current + formatter.setLocalizedDateFormatFromTemplate("HH:mm:ss.SSS") // ms resolution + return formatter + } + + private static func initTimestampToSeconds() -> Double { + // Cache the conversion. Note that clock rate can change with cpu throttling. + // These are high-resolution timestamps taken from the system timer. + var info = mach_timebase_info(numer: 0, denom: 0) + mach_timebase_info(&info) + let numer = Double(info.numer) + let denom = Double(info.denom) + return 1e-9 * (numer / denom) // inverse so we can multiply + } + + static func timestampStartTime(_ timestampToSeconds: Double) -> Double { + let timestamp = Double(mach_absolute_time()) + let time = timestamp * timestampToSeconds + return time + } + + // need timestamps in other parts of the app + func timestamp() -> Double { + let timestamp = Double(mach_absolute_time()) + let time = timestamp * timestampToSeconds + return time + } +} + +let logState = LogState() + +class Log: @unchecked Sendable { + + // Custom logging group - usually based on source filename. + // This has a very limited output, but does go to console + private var log: OSLog + + // Cache the filename for reporting it on errors. + private var file: String + // All logs go to this category for filtering. + private var category: String + + init(_ category: String = #file, file: String = #file) { + // Strip the path, but allow hierachical category f.e. "Group/Subgroup" wihtout .swift. + self.category = category + if category.hasSuffix(".swift") { + self.category = Log.stripFilePathAndExtension(category) + } + + // Compute once for use in logs. + self.file = Log.stripFilePathAndExtension(file) + + self.log = OSLog(subsystem: logState.subsystem, category: self.category) + } + + // Test whether messages are logged for the given levels + func isWarnEnabled() -> Bool { + return log.isEnabled(type: .default) + } + func isInfoEnabled() -> Bool { + return log.isEnabled(type: .info) + } + func isDebugEnabled() -> Bool { + #if DEBUG + return log.isEnabled(type: .debug) + #else + return false + #endif + } + + private func logToOSLog(_ text: String, _ type: OSLogType) { + // TODO: this needs to split the string up, since os_log limits length to + // some paltry 1023 chars. + os_log("%s", log: log, type: type, text) + } + + func error(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) { + let text = formatMessage(message(), .error, function, line) + if logState.prints { + print(text) + } else { + logToOSLog(text, .error) + } + } + + // os_log left out warnings, so reuse default type for that + func warn(_ message: @autoclosure () -> String, _ function: String = #function, _ line: Int = #line) { + let text = formatMessage(message(), .default, function, line) + if logState.prints { + print(text) + } else { + logToOSLog(text, .default) // this doesn't get colored yellow like a warning + } + } + + func info(_ message: @autoclosure () -> String) { + let text = formatMessage(message(), .info) + if logState.prints { + print(text) + } else { + logToOSLog(text, .info) + } + } + + func debug(_ message: @autoclosure () -> String) { + // debug logs are stripped from release builds + #if DEBUG + let text = formatMessage(message(), .debug) + if logState.prints { + print(text) + } else { + logToOSLog(text, .debug) + } + #endif + } + + private func formatLevel(_ level: OSLogType) -> String { + switch level { + case .debug: return "" + case .info: return "" + case .default: return "⚠️" + case .error: return "🛑" + default: return "" + } + } + + // Customize this printing as desired. + private func formatMessage(_ message: String, _ level: OSLogType, _ function: String = "", _ line: Int = 0) -> String { + var text = "" + + let levelText = formatLevel(level) + + if logState.prints { + let timestamp = Log.formatTimestamp() + + // These messages never go out to the system console, just the debugger. + switch level { + case .debug: + text += "\(timestamp)\(levelText)D[\(category)] \(message)" + case .info: + text += "\(timestamp)\(levelText)I[\(category)] \(message)" + case .default: // not a keyword + text += "\(timestamp)\(levelText)W[\(category)] \(message)" + text += Log.formatLocation(file, line, function) + case .error: + text += "\(timestamp)\(levelText)E[\(category)] \(message)\n" + text += Log.formatLocation(file, line, function) + default: + text += message + } + } else { + // Consider reporting the data above to os_log. + // os_log reports data, time, app, threadId and message to stderr. + text += levelText + text += message + + // os_log can't show correct file/line, since it uses addrReturnAddress - ugh + switch level { + case .default: fallthrough + case .error: + text += Log.formatLocation(file, line, function) + default: + break + } + } + + if logState.stacktraces && (level == .error) { + text += "\n" + + // Improve this - these are mangled symbols without file/line of where + Thread.callStackSymbols.forEach { text += $0 + "\n" } + } + + return text + } + + // location support + private static func formatLocation(_ file: String, _ line: Int, _ function: String) -> String { + var text = "" + let threadName = Thread.current.name ?? "" + var queueName = OperationQueue.current?.name ?? "" + if !queueName.isEmpty { + queueName = ":" + queueName + } + + text += "\n at \(file):\(line)@\(function)" + if !threadName.isEmpty || !queueName.isEmpty { + text += "\n on \(threadName)\(queueName)" + } + return text + } + + private static func stripFilePathAndExtension(_ path: String) -> String { + let str = path as NSString + return (str.deletingPathExtension as NSString).lastPathComponent + } + + // timestamp support + + + private static func timeFromStart() -> Double { + return max(0.0, Log.timestamp() - logState.timestampStart) + } + + private static func timeAbsolute() -> String { + let timestamp = Log.timeFromStart() + let date = Date(timeInterval: timestamp, since: logState.timestampStartDate) + return logState.timestampFormatter.string(from: date) + } + + private static func formatTimestamp() -> String { + var timestamp = "" + if logState.timestamps { + if logState.absoluteTimestamps { + timestamp = Log.timeAbsolute() + " " + } else { + timestamp = String(format: "%.3fs ", Log.timeFromStart()) + } + } + return timestamp + } + + static func timestamp() -> Double { + return logState.timestamp() + } +} + diff --git a/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json b/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/kram-profile/kram-profile/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/kram-profile/kram-profile/kram_profile.entitlements b/kram-profile/kram-profile/kram_profile.entitlements new file mode 100644 index 00000000..e00d8415 --- /dev/null +++ b/kram-profile/kram-profile/kram_profile.entitlements @@ -0,0 +1,12 @@ + + + + + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-only + + com.apple.security.network.client + + + diff --git a/kram-profile/kram-profile/kram_profileApp.swift b/kram-profile/kram-profile/kram_profileApp.swift new file mode 100644 index 00000000..7e0e4e0f --- /dev/null +++ b/kram-profile/kram-profile/kram_profileApp.swift @@ -0,0 +1,2800 @@ +// kram - Copyright 2020-2024 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +import SwiftUI +import WebKit +import UniformTypeIdentifiers + +// https://github.com/gualtierofrigerio/WkWebViewJavascript/blob/master/WkWebViewJavascript/WebViewHandler.swift + +// https://levelup.gitconnected.com/how-to-use-wkwebview-on-mac-with-swiftui-10266989ed11 +// Signing & Capabilites set App Sandbox (allow outgoing connections) + +// This is really just a wrapper to turn WKWebView into something SwiftUI +// can interop with. SwiftUI has not browser widget. + +// DONE: add bg list color depending on sort +// DONE: fn+F doesn't honor fullscreen +// DONE: Perfetto can only read .gz files, and not .zip files. +// But could decode zip files here, and send over gz compressed. +// Would need to idenfity zip archive > 1 file vs. zip single file. +// DONE: add gz compression to all file data. Use libCompression +// but it only has zlib compression. Use DataCompression which +// messages zlib deflate to gzip. +// DONE: if list hidden, then can't advance +// DONE: be nice to focus the search input on cmd+F just to make me happy. (using cmd+S) +// Browser goes to its own search which doesn’t help. + +// TODO: Hitting T to sort, changes the selection. That shouldn't happen. + +// Memory traces +// TODO: sort thread by size - repack the memory graph largest to smallest by reordering each track +// then can focus on the bigger values. +// TODO: Sort by name and convert to count - can then see common counts +// so keep the json loaded in Swift. Can json be cloned and modded? +// TODO: option to coalesce to count and name with sort + +// Build traces +// DONE: build hierarchy and self times +// DONE: background process to compute buildTimings across all files +// DONE: add a total time, and show that in the nav panel, and % of total +// then know for a summary what the total time spend compiling is. +// DONE: parse instantiateFunction totals from build traces, what CBA is doing +// avoid InstatiateClass since it's a child +// DONE: parse optFunction totals from build traces, what CBA is doing +// TODO: duration may not updating properly when doing Reload on loose files, but thought this fixed +// TODO: add children of each archive, so those show in the list and can collapse + +// Perf traces +// TODO: build per-thread hierarchy and self times + +// TODO: track kram-profile memory use, jettison Data that isn't needed after have built up timings. +// can re-decompress from zip mmap. + +// DONE: import zip +// DONE: add/update recent document list (need to hold onto dropped/opened folder) +// DONE: can't mmap web link, but can load zip off server with timings + +// DONE: drop anything in the build report < 1% in one sub-track +// Could display on subtract but would have to fit under the parent timing (but should). +// Could stop when remaining total child time < parent. +// This stops the long tail. So just total the results, and start setting ts once reached. + +// TODO: run cba on files, mmap and decompress each can use incremental mode? +// TODO: save/load the duration and modstamps for File at quit, and any other metadata (totals per section) +// TODO: add jump to source/header, but path would need to be correct (sandbox block?) + +// TODO: look into fast crc32 ops on M1 +// can use this on loose fils as a hash, and also compare to zip files +// already have a crc32 in the zip lib +// https://dougallj.wordpress.com/2022/05/22/faster-crc32-on-the-apple-m1/ + +// Build traces +// DONE: OptFunction needs demangled. All backend strings are still mangled. +// Don’t need the library CBA uses just use api::__cxa_demangle() on macOS. +// https://github.com/llvm/llvm-project/issues/459 + +// TODO: across all files, many of the strings are the same. Could replace all strings +// with an index, compress, and zip archive with the index table. buid, perf, mem strings +// filenames are also redundant. Just need to use @[F|S|B]num, and then do lookup before CBA final report. +// table if global would need to use same index across all files. +// Can rebuild references on JS side to send less data. JS can then alias strings ftw. +// Just add special ph type that is ignored by web to specify the alias. +// TODO: work on sending a more efficient form. Could use Perfetto SDK to write to prototbuf. The perfetto json format is overly verbose. Need some thread and scope strings, some open/close timings that reference a scope string and thread. +// TODO: add compressed format, build up Pefetto json or binary from this +// may need one for mmap, other for super compact deltas +// can still alias strings from mmap +// +// have various duration forms. +// could have ascii form of below. +// flags to identify optional param +// 4B magic +// n len nid name // nid is repacked 0..table +// t len tid name // tid is repacked to 0... table +// s len sid name // symbols +// i tid tmin tmax count // have this written at end of file for each thread +// f fid nid line nid (file line func) +// r rid len sid sid sid sid +// s opt(tid nid fid color) dur opt(time), opt means uses prior tid/nid/color of that tid +// (defaults if none). May need to buffer per thread, top of buffer explicit +// then merge with the other buffers, compare last tid data written. +// s = 1 + 3 + 3 + 8 + 8 + 4 = 29B +// smin = 1 + 8B +// need a way to tag file/line, and count into the dump + +// timings can delta encoded, but with ts/dur a parent scope writes after. +// they aren't ordered by startTime. Also missing any unclosed scoping. +// compared to -t +t sequences. Note -0 = 0, so 0 is an open scope (check <=0 ) + +// 4-bit, 12-bit, 16-bit, variable, pad to 4B + +// DONE: recent documents list doesn't survive relaunch, but only when app is rebuilt +// but still kind of annoying for development + +// DONE: have a way to reload dropped folder +// DONE: track parent archives, folder, and loose drop files +// and when reload is hit, then reload all of that rebuild the list +// and then reload the selected file +// DONE: zipHelper to deal with archives, can use Swift Data to mmap content if needed +// mmap the zip, list out the files and locations, and then defalte the content somewhere +// only then can data be handed off toe Pefertto or CBA. And CBA needs all files. +// Maybe extend CBA to read a zip file. Can just use ZipHelper. + +// TODO: use refreshable on the list to allow await on an async tasks +// so could refresh the durations off that. +// TODO: passing children field to the List ctor creates a hierarchical list. +// so could have dropped file, archive, folder in the list to collapse the view +// Each file array would be a child. Parent would be clearer then. + +// TODO: fix duration update modding the List item and nav title after it updates +// Currently select list, it updates, then duration is calcualated. +// there is a objectWillChange.send() Could maybe send that from The File +// https://www.hackingwithswift.com/forums/swiftui/update-content-of-existing-row-of-list/3029 + +// WKWebView +// TODO: can't block drop onto the WKWebView +// TODO: can't overide "delete" or "shift+Delete" key doing a back/fwd in the WKWebView history +// Perfetto warns that content will be lost + +// Perfetto Bugs +// TODO: fix the js wait, even with listener, there's still a race +// maybe there's some ServiceWorker still loading the previous json? +// Perfetto is using a ServiceWorker, Safari uses those now, and ping/pong unware. +// TODO: switch to Perfetto dark mode + +// Multi-window +// TODO: support WindowGroup and multiwindow, each needs own webView, problem +// is that onOpenURL opens a new window always. +// TODO: look in to hosting Remotery web and/or Tracy server, Tracy is imgui +// but these don't review traces offline, and are live profilers +// TODO: add Metal capture and imgui backend to Tracy +// TODO: add Metal capture to Remotery (this isn't a flamegraph) + +// TODO: switch font to Inter, bundle that with the app? +// .environment(\.font, Font.custom("CustomFont", size: 14)) +// TODO: for perf traces, compute duration between frame +// markers. Multiple frames in a file, then show max frame duration +// instead of the entire file. +// TODO: no simple scrollTo, since this is all React style +// There is a ScrollViewReader, but value only usable within. UITableView has this. +// TODO: track when files change or get deleted, update the list item then +// can disable list items that are deleted in case they return (can still pick if current) +// https://developer.apple.com/documentation/coreservices/file_system_events?language=objc +// TODO: here's how to sign builds for GitHub Actions +// https://docs.github.com/en/actions/deployment/deploying-xcode-applications/installing-an-apple-certificate-on-macos-runners-for-xcode-development + +// See here about Navigation API +// https://developer.apple.com/videos/play/wwdc2022/10054/ + +// This is how open_trace_in_ui.py tells the browser to open a file +// http://ui.perfetto.dev/#!/?url=http://127.0.0.1:9001/{fname} +// Then the http server serves up that file to the browser and sets Allow-Origin header. + +// https://developer.mozilla.org/en-US/docs/Glossary/Base64#the_unicode_problem +//https://stackoverflow.com/questions/30106476/using-javascripts-atob-to-decode-base64-doesnt-properly-decode-utf-8-strings +// https://gist.github.com/chromy/170c11ce30d9084957d7f3aa065e89f8 +// need to post this JavaScript + +// https://stackoverflow.com/questions/32113933/how-do-i-pass-a-swift-object-to-javascript-wkwebview-swift + +// https://stackoverflow.com/questions/37820666/how-can-i-send-data-from-swift-to-javascript-and-display-them-in-my-web-view + +// Dealing with available and Swift and SwiftUI. Ugh. +// https://www.swiftyplace.com/blog/swift-available#:~:text=Conditional%20Handling%20with%20if%20%23available&text=If%20the%20device%20is%20running%20an%20earlier%20version%20of%20iOS,a%20fallback%20for%20earlier%20versions. + +// Video about Combine +// https://www.youtube.com/watch?v=TshpcKZmma8 + +// Description of CoreData/SwiftData and how it works +// https://davedelong.com/blog/2021/04/03/core-data-and-swiftui/ +// +// List sort picker +// https://xavier7t.com/swiftui-list-with-sort-options +// +// https://stackoverflow.com/questions/70652964/how-to-search-a-table-using-swiftui-on-macos +// +// https://developer.apple.com/documentation/swiftui/adding-a-search-interface-to-your-app +// can filter list items off this + +class FileSearcher: ObservableObject { + @Published var searchIsActive = false + @Published var searchText = "" + + var files: [File] = [] + + // I made this plublished so sort would also cause update to filesSearched + // but the search field keeps re-focusing + // @Published var filesSorted: [File] = [] + var filesSorted: [File] = [] + + // duplicate code, but init() doesn't have self defined + func updateFilesSorted(_ sortByDuration: Bool = false) { + // may not want to sort everytime, or the list will change as duration is updated + // really want to do this off a button, and then set files to that + let sortedResults = files.sorted { + if !sortByDuration { + return $0.id < $1.id + } + else { + // keep the groupings, just sort the duration within + if $0.parentFolders != $1.parentFolders { + return $0.parentFolders < $1.parentFolders + } + if $0.duration == $1.duration { + return $0.id < $1.id + } + // TODO: may want to also search by last + return $0.duration > $1.duration + } + } + + filesSorted = sortedResults + + // TODO: important or filesSearched isn't updated in the list when + // the sort occurs. This is causing filter to re-focus since + // it thinks the searchText which is also Published changed. + objectWillChange.send() + } + + var filesSearched: [File] { + + if searchText.isEmpty || filesSorted.count <= 1 { + return filesSorted + } + else if searchText.count == 1 { + let lowercaseSearchText = searchText.lowercased() + let uppercaseSearchText = searchText.uppercased() + + return filesSorted.filter { + $0.name.starts(with: uppercaseSearchText) || + $0.name.starts(with: lowercaseSearchText) + } + } + else { + // is search text multistring? + return filesSorted.filter { + $0.name.localizedCaseInsensitiveContains(searchText) + } + } + } +} + +// https://stackoverflow.com/questions/24074479/how-to-create-a-string-with-format +extension String.StringInterpolation { + + /// Quick formatting for *floating point* values. + mutating func appendInterpolation(float: Float, decimals: UInt = 2, zero: Bool = true) { + let floatDescription = String(format:"%.\(decimals)f%", float) +// if stripTrailingZeros && decimals > 0 { +// // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double +// floatDescription = floatDescription.replacingOccurrences(of: "^([\\d,]+)$|^([\\d,]+)\\.0*$|^([\\d,]+\\.[0-9]*?)0*$", with: "$1$2$3", options: .regularExpression) +// } + appendLiteral(floatDescription) + } + + mutating func appendInterpolation(double: Double, decimals: UInt = 2, zero: Bool = true) { + let floatDescription = String(format:"%.\(decimals)f%", double) +// if stripTrailingZeros && decimals > 0 { +// // https://stackoverflow.com/questions/29560743/swift-remove-trailing-zeros-from-double +// floatDescription = floatDescription.replacingOccurrences(of: "^([\\d,]+)$|^([\\d,]+)\\.0*$|^([\\d,]+\\.[0-9]*?)0*$", with: "$1$2$3", options: .regularExpression) +// } + appendLiteral(floatDescription) + } + + /// Quick formatting for *hexadecimal* values. + mutating func appendInterpolation(hex: Int) { + let hexDescription = String(format: "0x%X", hex) + appendLiteral(hexDescription) + } + + /// Quick formatting for *percents*. + mutating func appendInterpolation(percent: Double, decimals: UInt = 2) { + let percentDescription = String(format: "%.\(decimals)f%%", percent * 100) + appendLiteral(percentDescription) + } + + /// Formats the *elapsed time* since the specified start time. +// don't use logging for profiling +// mutating func appendInterpolation(timeSince startTime: TimeInterval, decimals: UInt = 2) { +// let elapsedTime = CACurrentMediaTime() - startTime +// let elapsedTimeDescription = String(format: "%.\(decimals)fs", elapsedTime) +// appendLiteral(elapsedTimeDescription) +// } +} + +/* usage + let number = 1.2345 + "Float: \(float: number)" // "Float: 1.23" + "Float: \(float: number, decimals: 1)" // "Float: 1.2" + + let integer = 255 + "Hex: \(hex: integer)" // "Hex: 0xFF" + + let rate = 0.15 + "Percent: \(percent: rate)" // "Percent: 15.00%" + "Percent: \(percent: rate, decimals: 0)" // "Percent: 15%" + + let startTime = CACurrentMediaTime() + Thread.sleep(forTimeInterval: 2.8) + "∆t was \(timeSince: startTime)" // "∆t was 2.80s" + "∆t was \(timeSince: startTime, decimals: 0)" // "∆t was 3s" + */ + +private let log = Log("kram/App") + +public func clamp(_ value: T, _ minValue: T, _ maxValue: T) -> T where T : Comparable { + return min(max(value, minValue), maxValue) +} + + +//------------- + +class MyWebView : WKWebView { + + // So that keyboard events are routed + override var acceptsFirstResponder: Bool { true } + + // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/HandlingKeyEvents/HandlingKeyEvents.html + +// https://nshipster.com/wkwebview/ + + func isKeyHandled(_ event: NSEvent) -> Bool { + + // can't block delete or shift+delete + // so the WKWebView goes back/foward through it's 1 page history. + // that loses all context for the user. + + // prevent super annoying bonk/NSBeep + // if don't check modifier flags (can't check isEmpty since 256 is often set + // then the cmd+S stops working + if !(event.modifierFlags.contains(.command) || + event.modifierFlags.contains(.control) || + event.modifierFlags.contains(.option)) { + // wasd + if event.keyCode == Keycode.w || event.keyCode == Keycode.a || event.keyCode == Keycode.s || event.keyCode == Keycode.d { + return true + } + } + return false + } + + // Apple doesn't want this to be overridden by user, but key handling + // just doesn't work atop the WKWebView without this. KeyUp/KeyDown + // overrides don't matter, since we need the WKWebView to forward them + override func performKeyEquivalent(with event: NSEvent) -> Bool { + if !isKeyHandled(event) { + return super.performKeyEquivalent(with: event) + } + return true + } + + /* still not working + override func performDragOperation(_ sender: NSDraggingInfo) -> Bool { + //let myWebView = superview as! MyWebView + //if let dropDelegate = myWebView.dropDelegate { + // return dropDelegate.webView(myWebView, performDragOperation: sender) + //} + return false + } + + // really want to shim drop + func shimDrag() { + // https://stackoverflow.com/questions/25096910/receiving-nsdraggingdestination-messages-with-a-wkwebview + + // Override the performDragOperation: method implemented on WKView so that we may get drop notification. + let originalMethod = class_getInstanceMethod(object_getClass(subviews[0]), #selector(NSDraggingDestination.performDragOperation(_:))) + let overridingMethod = class_getInstanceMethod(object_getClass(self), #selector(NSDraggingDestination.performDragOperation(_:))) + method_exchangeImplementations(originalMethod!, overridingMethod!) + } + */ +} + +@MainActor +func newWebView(request: URLRequest) -> WKWebView { + // set preference to run javascript on the view, can then do PostMessage + let preferences = WKPreferences() + //preferences.javaScriptEnabled = true + //preferences.allowGPUOptimizedContents = true + + let webpagePreferences = WKWebpagePreferences() + webpagePreferences.allowsContentJavaScript = true + + let configuration = WKWebViewConfiguration() + configuration.preferences = preferences + configuration.defaultWebpagePreferences = webpagePreferences + + // here frame is entire screen + let webView = MyWebView(frame: .zero, configuration: configuration) + //webView.shimDrag() + + // The page is complaining it's going to lose the data. This disables swipe fwd/back. + // Still occuring because this doesn't disable the "delete" key which goes back in history + webView.allowsBackForwardNavigationGestures = false + + webView.load(request) + return webView +} + +// This is just an adaptor to allow WkWebView to interop with SwiftUI. +// It's unclear if WindowGroup can even have this hold state. +struct WebView : NSViewRepresentable { + //let request: URLRequest + let webView: WKWebView + + // This is set by caller to the url for the request + func makeNSView(context: Context) -> WKWebView { + return webView + } + + // can get data back from web view + func userContentController(_ userContentController: WKUserContentController, didReceive message: WKScriptMessage) { + if message.name == "postMessageListener" { + // Manage your Data + } + } + + // This is called to refresh the view + func updateNSView(_ webView: WKWebView, context: Context) { + + } + + // Here's sample code to do a screenshot, can this use actual dimensions + // https://nshipster.com/wkwebview/ +// func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) +// { +// var snapshotConfiguration = WKSnapshotConfiguration() +// snapshotConfiguration.snapshotWidth = 1440 +// +// webView.takeSnapshot(with: snapshotConfiguration) { (image, error) in +// guard let image = image, error == nil else { +// return +// } +// +// // TODO: save out the image +// } +// } + +} + +// TODO: fix the previewz +//#Preview { +// MyWKWebView() +//} + +//------------- + +/* +class MyMTKView: MTKView { + + + +} + +// wraps MTKView (NSView) into SwiftUI, so it can be a part of the hierarcy, +// updateNSView called when app foreground/backgrounded, or the size is changed +// also look at Tracy server +struct MTKViewWrapper: NSViewRepresentable { + var mtkView: MyMTKView + + // TODO: could hand this down without rebuilding wrapper, could be @Published from UserData + //var currentPath: String + + func makeNSView(context: NSViewRepresentableContext) -> MyMTKView { + return mtkView + } + + func updateNSView(_ view: MyMTKView, context: NSViewRepresentableContext) { + //view.updateUserData(currentPath: currentPath) + + } +} +*/ + +// https to work for some reason, but all data is previewed locally +let ORIGIN = "https://ui.perfetto.dev" + +// https://gist.github.com/pwightman/64c57076b89c5d7f8e8c +extension String { + var javaScriptEscapedString: String { + // Because JSON is not a subset of JavaScript, the LINE_SEPARATOR and PARAGRAPH_SEPARATOR unicode + // characters embedded in (valid) JSON will cause the webview's JavaScript parser to error. So we + // must encode them first. See here: http://timelessrepo.com/json-isnt-a-javascript-subset + // Also here: http://media.giphy.com/media/wloGlwOXKijy8/giphy.gif + let str = self.replacingOccurrences(of: "\u{2028}", with: "\\u2028") + .replacingOccurrences(of: "\u{2029}", with: "\\u2029") + // Because escaping JavaScript is a non-trivial task (https://github.com/johnezang/JSONKit/blob/master/JSONKit.m#L1423) + // we proceed to hax instead: + do { + let encoder = JSONEncoder() + let data = try encoder.encode([str]) + let encodedString = String(decoding: data, as: UTF8.self) + + // drop surrounding {}? + return String(encodedString.dropLast().dropFirst()) + } catch { + return self + } + } +} + + +extension View { + public func searchableOptional(text: Binding, isPresented: Binding, placement: SearchFieldPlacement = .automatic, prompt: S) -> some View where S : StringProtocol { + if #available(macOS 14.0, *) { + return self.searchable(text: text, isPresented: isPresented, placement: + placement, prompt: prompt) + } + else { + return self + } + } + + /* + // This one is hard to wrap, since KeyPress.result is macOS 14.0 only + public func onKeyPressOptional(_ key: KeyEquivalent, action: @escaping () -> KeyPress.Result) -> some View { + if #available(macOS 14.0, *) { + return onKeyPress(.upArrow, action: action) + } + else { + return self + } + } + */ +} + +// What if the start time in the file isn't 0.0 based for the start +struct TimeRange { + var timeStart: Double = 0.0 + var timeEnd: Double = 1.0 + + // The time range should take up 80% of the visible window. + var viewPercentage: Double = 0.8 +} + + + +func filenameToTimeRange(_ filename: String) -> TimeRange { + var duration = 1.0 + + switch File.filenameToFileType(URL(string: filename)!) { + case .Build: duration = 1.0 + case .Memory: duration = 64.0 + case .Perf: duration = 0.1 // 100ms + case .Unknown: duration = 1.0 + } + + duration = 10.0 + + return TimeRange(timeStart:0.0, timeEnd:duration) +} + +func buildTimeRangeJson(_ timeRange:TimeRange) -> String? { + if timeRange.timeEnd == 0.0 { + return nil + } + + // This is in nanos + let timeStartInt = Int(timeRange.timeStart * 1e9) + let timeEndInt = Int(timeRange.timeEnd * 1e9) + + // Time is not found, it's in ui/src/base/time.ts + // Sending down nanos seems to work provided the number has n suffix + // TODO: Perfetto seems to only honor this the first time it's sent. + + // This one doesn't go thorugh JSON.parse() + // timeStart: Time.fromSeconds(\(timeRange.timeStart)), + // timeEnd: Time.fromSeconds(\(timeRange.timeEnd)), + + // The postMessage if using Json.stringify can't handle the BigInt + let script = """ + var objTime = { + perfetto:{ + keepApiOpen: true, + timeStart:\(timeStartInt)n, + timeEnd:\(timeEndInt)n, + viewPercentage:\(timeRange.viewPercentage) + }}; + """ + + return script +} + +// Flutter uses this to jump to a time range +func showTimeRangeJS(objTimeScript: String) -> String? { + + + // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174 + + + /* + + // The |time| type represents trace time in the same units and domain as trace + // processor (i.e. typically boot time in nanoseconds, but most of the UI should + // be completely agnostic to this). + export type time = Brand; + + https://github.com/google/perfetto/blob/45fe47bfe4111454ba7063b9b4d438369090d6ba/ui/src/common/actions.ts#L97 + export interface PostedScrollToRange { + timeStart: time; + timeEnd: time; // ugh? + viewPercentage?: number; + } + + // https://github.com/flutter/devtools/blob/8bf64b754a4677b66d22fe6f1212bd72d1e789b8/packages/devtools_app/lib/src/screens/performance/panes/flutter_frames/flutter_frame_model.dart#L29 + + */ + + let script = """ + + // https://jsfiddle.net/vrsofx1p/ + function waitForUI(objTime) + { + const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50); + + const onMessageHandler = (evt) => { + if (evt.data !== 'PONG') return; + + // We got a PONG, the UI is ready. + window.clearInterval(timer); + window.removeEventListener('message', onMessageHandler); + + // was win, but use window instead + window.postMessage(objTime, '\(ORIGIN)'); + } + + window.addEventListener('message', onMessageHandler); + } + + waitForUI(objTime); + """ + + return objTimeScript + script +} + +// These are really json events from perfetto profile. +struct PerfettoEvent: Codable { + var cat: String? + var pid: Int? + var tid: Int? + var ph: String? + var ts: Int? + var dur: Int? + var name: String? + var args: [String : AnyCodable]? + + // var tts: Int? - thread clock timestamp + // var cname: String? - color name from table + // Also can have stack frames + + // These are data computed from the events + var durSub: Int? + var parentIndex: Int? + + // can't have setters on a Struct, only init + init(_ tid: Int, _ name: String, _ dur: Int) { + self.ts = 0 + self.tid = tid + self.dur = dur + self.name = name + self.ph = "X" + } + + init(tid: Int, threadName: String) { + self.ts = 0 + self.dur = 0 + self.name = "thread_name" + self.ph = "M" + self.tid = tid + self.args = [:] + self.args!["name"] = AnyCodable(threadName) + } + + // only encode/decode some of the keys + enum CodingKeys: String, CodingKey { + case cat, pid, tid, ph, ts, dur, name, args + } +} + +struct PerfettoProfile: Codable { + var traceEvents: [PerfettoEvent]? + + // not a part of the perfetto spec, but clang writes this when it zeros + // out the startTime + var beginningOfTime: Int? +} + +class ThreadInfo : Hashable, Equatable, Comparable { + + var id: Int = 0 + var threadName: String = "" + var startTime: Int = Int.max + var endTime: Int = Int.min + var endTimeFree: Int = Int.min + var count: Int = 0 + + // id doesn't implement Hashable + func hash(into hasher: inout Hasher) { + hasher.combine(id) + } + + public static func == (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool { + return lhs.id == rhs.id + } + public static func < (lhs: ThreadInfo, rhs: ThreadInfo) -> Bool { + return lhs.id < rhs.id + } + + func combine(_ s: Int, _ d: Int, _ name: String?) { + let isFreeBlock = name != nil && name! == "Free" + let e = s+d + + if isFreeBlock { + endTimeFree = max(endTimeFree, e) + + // If all free block, this doesn't work + // so update start/endTime assuming first block isn't Free + if startTime > endTime { + startTime = min(startTime, s) + endTime = max(endTime, e) + } + } + else { + startTime = min(startTime, s) + endTime = max(endTime, e) + } + + count += 1 + } + + var description: String { + let duration = Double(endTime - startTime) * 1e-6 + + // TODO: could display freeDuration (heap size) + var freeDuration = duration + if endTimeFree != Int.min { + freeDuration = Double(endTimeFree - startTime) * 1e-6 + } + let percentage = freeDuration > 0.0 ? ((duration / freeDuration) * 100.0) : 0.0 + + // only display percentage if needed + if percentage > 99.9 { + return "\(id) '\(threadName)' \(double: duration, decimals:6)s \(count)x" + } + else { + return "\(id) '\(threadName)' \(double: duration, decimals:6)s \(double:percentage, decimals:0)% \(count)x" + } + } + +} + +class BuildFunctionTiming { + var count = 0 + var duration = 0 + + func combine(_ duration: Int) { + self.duration += duration + self.count += 1 + } + func combine(_ timing: BuildFunctionTiming) { + self.duration += timing.duration + self.count += timing.count + } +} + +class BuildFunctionTimings { + var optFunctions: [String:BuildFunctionTiming] = [:] + var instantiateFunctions: [String:BuildFunctionTiming] = [:] + + func combine(_ event: PerfettoEvent) { + if event.name == "OptFunction" { + let detail = event.args!["detail"]!.value as! String + let dur = event.dur! + + // With classes need to create a new one to combine into + if let f = optFunctions[detail] { + f.combine(dur) + } + else { + let f = BuildFunctionTiming() + f.combine(dur) + optFunctions[detail] = f + } + } + else if event.name == "InstantiateFunction" { + let detail = event.args!["detail"]!.value as! String + let dur = event.dur! + + if let f = instantiateFunctions[detail] { + f.combine(dur) + } + else { + let f = BuildFunctionTiming() + f.combine(dur) + instantiateFunctions[detail] = f + } + } + } + + func combine(_ timings: BuildFunctionTimings, _ collapseNames: Bool = false) { + for pair in timings.optFunctions { + var detail = pair.key + let timing = pair.value + + // go out to CBA to collapse the names + if collapseNames { + // skip non-templates + if detail.firstIndex(of: "<") == nil { continue } + + if let newDetail = collapseFunctionName(detail) { + detail = String(cString: newDetail) + } + } + + if let f = optFunctions[detail] { + f.combine(timing) + } + else { + let f = BuildFunctionTiming() + f.combine(timing) + optFunctions[detail] = f + } + } + for pair in timings.instantiateFunctions { + var detail = pair.key + let timing = pair.value + + // go out to CBA to collapse the names + if collapseNames { + // skip non-templates + if detail.firstIndex(of: "<") == nil { continue } + + if let newDetail = collapseFunctionName(detail) { + detail = String(cString: newDetail) + } + } + + if let f = instantiateFunctions[detail] { + f.combine(timing) + } + else { + let f = BuildFunctionTiming() + f.combine(timing) + instantiateFunctions[detail] = f + } + } + } + + func reset() { + optFunctions.removeAll() + instantiateFunctions.removeAll() + } +} + +// Could also process each build timings in a threaded task. That what CBA is doing. +class BuildTiming: NSCopying { + var name = "" // duped with key of map + var type = "" + var count = 0 + var duration = 0 + var durationSub = 0 + var durationSelf: Int { return max(0, duration - durationSub) } + + func combine(_ duration: Int, _ durationSub: Int) { + self.duration += duration + self.durationSub += durationSub + self.count += 1 + } + + func combine(_ timing: BuildTiming) { + self.duration += timing.duration + self.durationSub += timing.durationSub + self.count += timing.count + } + + // This is annoying in Swift + func copy(with zone: NSZone? = nil) -> Any { + let copy = BuildTiming() + copy.name = name + copy.count = count + copy.duration = duration + copy.durationSub = durationSub + copy.type = type + return copy + } +} + +func updateFileBuildTimings(_ events: [PerfettoEvent]) -> [String:BuildTiming] { + var buildTimings: [String:BuildTiming] = [:] + + // DONE: would be nice to compute the self times. This involves + // sorting the startTime on a track, then by largest duration on ties + // and then subtracting the immediate children. + // See what CBA and Perfetto do to establish this. + + // Would be good to establish this nesting once and store the level + // with each event. + + // run through each file, and build a local map of name to size count + for i in 0.. [File] { + let selectedFile = lookupFile(url:URL(string:selection)!) + let isArchive = selectedFile.archive != nil + + let filteredFiles = files.filter { file in + if isArchive { + return file.archive != nil && file.archive! == selectedFile.archive! + } + else { + return file.parentFolders == selectedFile.parentFolders + } + } + + return filteredFiles; +} + +func postBuildTimingsReport(files: [File]) -> String? { + let buildTimings = mergeFileBuildTimings(files: files) + if buildTimings.isEmpty { return nil } + let buildStats = mergeFileBuildStats(files:files) + + // merge the function stats + // TODO: could to more to highlight and crunch template strings + let buildFunctionTimings = BuildFunctionTimings() + for file in files { + buildFunctionTimings.combine(file.buildFunctionTimings) + } + + // Compute more consolidation by collapsing names + let buildTemplateFunctionTimings = BuildFunctionTimings() + buildTemplateFunctionTimings.combine(buildFunctionTimings, true) + + let buildJsonBase64 = generateBuildReport( + buildTimings: buildTimings, + buildFunctionTimings: buildFunctionTimings, + buildTemplateFunctionTimings: buildTemplateFunctionTimings, + buildStats: buildStats) + + let buildJS = postLoadFileJS(fileContentBase64: buildJsonBase64, title: "BuildTimings") + return buildJS +} + +func mergeFileBuildStats(files: [File]) -> BuildStats { + let buildStats = BuildStats() + for file in files { + if file.buildStats != nil { + buildStats.combine(file.buildStats!) + } + } + + buildStats.frontendStart = 0 + buildStats.backendStart = buildStats.totalFrontend + + // This will scale way beyond the graph, so make it an average + // But will have lost the totals doing this. Could embed them in string. + buildStats.divideBy(10) + + return buildStats +} + +func mergeFileBuildTimings(files: [File]) -> [String:BuildTiming] { + var buildTimings: [String:BuildTiming] = [:] + + // run through all files, and zip the maps together + // then turn that into build events that can be shown. + for file in files { + // merge and combine duplicates + for buildTiming in file.buildTimings { + let v = buildTiming.value + if let bt = buildTimings[buildTiming.key] { + bt.combine(v.duration, v.durationSub) + } + else { + // need to copy to setup name/type + buildTimings[buildTiming.key] = (v.copy() as! BuildTiming) + } + } + // buildTimings.merge didn't work, combine src values + } + + return buildTimings +} + +func generateBuildReport(buildTimings: [String:BuildTiming], + buildFunctionTimings: BuildFunctionTimings, + buildTemplateFunctionTimings: BuildFunctionTimings, + buildStats: BuildStats) -> String +{ + // now convert those timings back into a perfetto displayable report + // So just need to build up the json above into events on tracks + var events: [PerfettoEvent] = [] + + // Also sort or assign a sort_index to the tracks. Sort biggest to smallest. + // Make the threadName for the track be the short filename. + + // add the thread names, only using 3 threads + if true { + let names = ["ParseTime", "ParseCount", "ParseSelf", + "OptimizeTime", + "InstFunc", "OptimizeFunc", + "InstTplFunc", "InstTplCount", "OptimizeTplFunc" + ] + for i in 0.. 0 { + event.name = "\(shortFilename) \(t.count)x \(double: durSelf, decimals:2, zero: false)s" + + // ParseSelf + event.tid = 3 + event.dur = t.durationSelf + events.append(event) + } + } + else if isOptimize { + // for now skip small contributions + let percent = Double(t.duration) * optimizeTimingInv + if percent < 0.01 { continue } + + event.name = "\(shortFilename) \(double: dur, decimals:2, zero: false)s" + + // OptimizeTime + event.tid = 4 + events.append(event) + } + } + + func printTimings(_ functions: [String:BuildFunctionTiming], _ event: inout PerfettoEvent, _ events: inout [PerfettoEvent], isCount: Bool = false ) { + // compute inverse timings + var timing = 0 + for time in functions.values { + timing += time.duration + } + let timingInv = 1.0 / Double(timing) + + // dump the highest duration + for tPair in functions{ + let duration = tPair.value.duration + let count = tPair.value.count + + let percent = Double(duration) * timingInv + if percent < 0.01 { continue } + + let dur = Double(duration) * 1e-6 + event.name = "\(tPair.key) \(double: dur, decimals:2, zero: false)s \(count)x" + if isCount { + // in 0.1s per count so they show up + event.dur = count * 10000 + } + else { + event.dur = duration + } + events.append(event) + } + } + + let doFunctionTimings = true + if doFunctionTimings { + // function (templates + regular) + event.tid = 5 + printTimings(buildFunctionTimings.instantiateFunctions, &event, &events) + + event.tid = 6 + printTimings(buildFunctionTimings.optFunctions, &event, &events) + + //------ + // templates + event.tid = 7 + printTimings(buildTemplateFunctionTimings.instantiateFunctions, &event, &events) + + event.tid = 8 + printTimings(buildTemplateFunctionTimings.instantiateFunctions, &event, &events, isCount: true) + + event.tid = 9 + printTimings(buildTemplateFunctionTimings.optFunctions, &event, &events) + } + + events.sort { + // want threadnames first, could just prepend these to array? + if $0.ph! != $1.ph! { + return $0.ph! < $1.ph! + } + + // then thread id + if $0.tid! != $1.tid! { + return $0.tid! < $1.tid! + } + + // then duration + // has to be > to work as a single tid + if $0.dur != $1.dur! { + return $0.dur! > $1.dur! + } + + // then name + return $0.name! < $1.name! + } + + // add in the summary of % spent across the build + let totalTrackEvents = convertStatsToTotalTrack(buildStats) + events += totalTrackEvents + + let perfettoProfile = PerfettoProfile(traceEvents: events) + + do { + // json encode, compress, and then base64 encode that + let encoder = JSONEncoder() + let fileContentFixed = try encoder.encode(perfettoProfile) + + // gzip compress the data before sending it over + guard let compressedData = fileContentFixed.gzip() else { return "" } + let fileContentBase64 = compressedData.base64EncodedString() + + return fileContentBase64 + } + catch { + log.error(error.localizedDescription) + } + + return "" +} + + +// TODO: Hook this up for memory traces, build more efficient array of thread events +func sortThreadsByName(_ perfettoProfile: inout PerfettoProfile) { + + var threads: [Int: [Int]] = [:] + + // first sort each thread + for i in 0.., but having trouble with lookup + var threadInfos: [Int: ThreadInfo] = [:] + + for i in 0.. Double { + var startTime = Int.max + var endTime = Int.min + + for i in 0.. e1.ts! + } + + // later events assumed to be parent with "X" events written out when done + return $0 > $1 + } + + // so now can look at the range of a node vs. next node + var root = 0; + + // skip the thread names + var buildThreadId = Int.max + while events[sortedIndices[root]].ph! == "M" { + buildThreadId = min(buildThreadId, events[sortedIndices[root]].tid!) + root += 1 + } + + var evRootIndex = sortedIndices[root] + var evRoot = events[evRootIndex]; + events[evRootIndex].parentIndex = Int(-1) + + for i in (root+1)..= evRoot.ts! && ev2.ts!+ev2.dur! <= evRoot.ts!+evRoot.dur! { + events[ev2Index].parentIndex = Int(root) + + // Could store children for full hierarchy, but don't need this + //evRoot.children.append(evt2Index); + + // All flamegraph really needs is for events to store a level + // of how deep they are on a given thread. Having to make this up is kinda costly. + + // Can create selfTime by subtracting durations of all children + // if the name matches (so Source from Source) + if ev2.name == evRoot.name { + if events[evRootIndex].durSub == nil { + events[evRootIndex].durSub = Int() // 0 + } + events[evRootIndex].durSub! += ev2.dur! + } + break; + } + + // walk up to the next parent + root = evRoot.parentIndex! + if root != -1 { + evRootIndex = sortedIndices[root] + evRoot = events[evRootIndex] + } + } + if root == -1 { + events[ev2Index].parentIndex = -1 + } + + // move the root to the one we found + root = i; + evRootIndex = sortedIndices[i] + evRoot = events[evRootIndex] + } +} + + +class Timer { + private static let kTickToSeconds = updateTimebase() + private var time: Double = -Timer.getTime() + + deinit { + stop() + } + + func timeElapsed() -> Double { + return time + } + + func restart() { + if time > 0.0 { + time = -Timer.getTime() + } + } + + func start() { + if time > 0.0 { + time -= Timer.getTime() + } + } + + func stop() { + if time < 0.0 { + time += Timer.getTime() + } + } + + private static func getTime() -> Double { + return Double(mach_absolute_time()) * kTickToSeconds + } + + private static func updateTimebase() -> Double { + var machTimebase = mach_timebase_info(numer: 0, denom: 0) + mach_timebase_info(&machTimebase) + + // AS = 125/3, Intel = 1/1 + return 1e-9 * Double(machTimebase.numer) / Double(machTimebase.denom) + } +} + +// Fire this off any time the list changes and there +// are build events in it. This will update the data within, +// so that the user doesn't have to visit every file manually. +func updateBuildTimingsTask(_ files: [File]) { + // Can use counter for progress. Could add up size instead of just count. + var counter = 0 + for file in files { + if !file.buildTimings.isEmpty { return } + + counter += 1 + } + + if counter == 0 { return } + + #if false + + + let backgroundTaskGroup = await withTaskGroup(of: Void.self) { group in + for file in files { + if file.fileType == .Build { + _ = group.addTaskUnlessCancelled { + guard Task.isCancelled == false else { return } + + do { + try await updateBuildTimingTask(file) + } + catch { + log.error(error.localizedDescription) + } + + } + } + } + } + + #else + + let _ = Task(priority: .medium, operation: { + let timer = Timer() + + for file in files { + + if file.fileType == .Build { + do { + try updateBuildTimingTask(file) + } + catch { + log.error(error.localizedDescription) + } + } + } + + timer.stop() + log.info("finished updating build timings in \(double:timer.timeElapsed(), decimals:3)s") + }) + + #endif +} + +func updateBuildTimingTask(_ file: File) /*async */ throws { + assert(file.fileType == .Build) + + // only checking this, and not duration == 0 + if !file.buildTimings.isEmpty { return } + + let fileContent = loadFileContent(file) + + var json : Data + + if file.containerType == .Compressed { + guard let unzippedContent = fileContent.gunzip() else { + return + } + json = unzippedContent + } + else if file.containerType == .Archive { + // this has already been extracted and decrypted + json = fileContent + } + else { + json = fileContent + } + + let decoder = JSONDecoder() + let perfettoProfile = try decoder.decode(PerfettoProfile.self, from: json) + if perfettoProfile.traceEvents == nil { // an array + return + } + + var events = perfettoProfile.traceEvents! + + // demangle the OptFunction name + for i in 0.. BuildStats { + let stats = BuildStats() + + // useful totals to track, many more in the files + for i in 0.. [PerfettoEvent] { + + var totalEvents: [PerfettoEvent] = [] + + // This is really ugly, change to using class? + + let tid = 0 + let trackEvent = PerfettoEvent(tid: tid, threadName: "Build Totals") + totalEvents.append(trackEvent) + + // This is a struct, so can modify copy and add + var event: PerfettoEvent + + func makeDurEvent(_ tid: Int, _ name: String, _ dur: Int, _ total: Int) -> PerfettoEvent { + let percent = 100.0 * Double(dur) / Double(total) + return PerfettoEvent(tid, "\(double:percent, decimals:0)% \(name)", dur) + } + let total = stats.totalExecuteCompiler + + // Removed Total from all these strings + + event = makeDurEvent(tid, "ExecuteCompiler", stats.totalExecuteCompiler, total) + totalEvents.append(event) + + event = makeDurEvent(tid, "Frontend", stats.totalFrontend, total) + event.ts = stats.frontendStart + totalEvents.append(event) + + // sub-areas of frontend + event = makeDurEvent(tid, "Source", stats.totalSource, total) + event.ts = stats.frontendStart + totalEvents.append(event) + + event = makeDurEvent(tid, "InstantiateFunction", stats.totalInstantiateFunction, total) + event.ts = stats.frontendStart + stats.totalSource + totalEvents.append(event) + + // This is nearly always bigger than its parent total 14% vs. 16%, so are totals wrong + var totalInstantiateClass = stats.totalInstantiateClass + if totalInstantiateClass > stats.totalInstantiateFunction { + totalInstantiateClass = stats.totalInstantiateFunction + } + + // This overlaps with some Source, and some InstantiateFunction, so it's sort of double + // counted, so clamp it for now so Perfetto doesn't freak out and get the event order wrong. + event = makeDurEvent(tid, "InstantiateClass", totalInstantiateClass, total) + event.ts = stats.frontendStart + stats.totalSource + totalEvents.append(event) + + // This total can exceed when backend start, so clamp it too + let tsCodeGenFunction = stats.frontendStart + stats.totalSource + stats.totalInstantiateFunction + + var totalCodeGenFunction = stats.totalCodeGenFunction + if tsCodeGenFunction + totalCodeGenFunction > stats.backendStart { + totalCodeGenFunction = stats.backendStart - tsCodeGenFunction + } + + event = makeDurEvent(tid, "CodeGen Function", totalCodeGenFunction, total) + event.ts = tsCodeGenFunction + totalEvents.append(event) + + + // can gen a lot of debug types, and clamp to backend + let tsDebugType = tsCodeGenFunction + totalCodeGenFunction + + var totalDebugType = stats.totalDebugType + if totalDebugType + totalDebugType > stats.backendStart { + totalDebugType = stats.backendStart - tsDebugType + } + + event = makeDurEvent(tid, "Debug", totalDebugType, total) + event.ts = tsDebugType + totalEvents.append(event) + + // backend + event = makeDurEvent(tid, "Backend", stats.totalBackend, total) + event.ts = stats.backendStart + totalEvents.append(event) + + event = makeDurEvent(tid, "Optimizer", stats.totalOptimizer, total) + event.ts = stats.backendStart + totalEvents.append(event) + + // event = makeDurEvent(tid, "OptModule", stats.totalOptModule, total) + // event.ts = stats.backendStart + stats.totalOptimizer + // totalEvents.append(event) + + event = makeDurEvent(tid, "CodeGenPasses", stats.totalCodeGenPasses, total) + event.ts = stats.backendStart + stats.totalOptimizer + totalEvents.append(event) + + event = makeDurEvent(tid, "OptFunction", stats.totalOptFunction, total) + event.ts = stats.backendStart + stats.totalOptimizer + totalEvents.append(event) + + /* + "Total ExecuteCompiler" + "Total Frontend" + "Total Source" + "Total InstantiateFunction" + "Total InstantiateClass" + "Total Codegen Function" + + + "Total Backend" + "Total Optimizer" + "Total CodeGenPasses" + "Total OptModule" + "Total OptFunction" + */ + + return totalEvents +} + +func replaceFunctionNamespaces(_ detail: inout String) { + // replace namespaces in the detail + let namespaces = ["std::", "kram::", "eastl::"] + for namespace in namespaces { + detail = detail.replacing(namespace, with:"") + } +} + +func loadFileJS(_ file: File) -> String? { + + do { + // use this for binary data, but need to fixup some json before it's sent + var fileContentBase64 = "" + + let isFileGzip = file.containerType == .Compressed + + // decompress archive from zip, since Perfetto can't yet decompress zip + // Note this will typically be fileType unknown, but have to flatten + // content within to the list. This just means part of a zip archive. + let fileContent = loadFileContent(file) + + let isBuildFile = file.fileType == .Build + + if !isBuildFile { + // perfetto only supports gzip, comments indicate zip is possible but only with refactor + // don't recompress gzip, note can't do timing if not decompressed + let doCompress = !isFileGzip + + // This is how Perfetto guesses as to format. Why no consistent 4 char magic? + // https://cs.android.com/android/platform/superproject/main/+/main:external/perfetto/src/trace_processor/forwarding_trace_parser.cc;drc=30039988b8b71541ce97f9fb200c96ba23da79d7;l=176 + + fileContentBase64 = fileContent.base64EncodedString() + + if !isFileGzip { + // see if it's binary or json. If binary, then can't parse duration below + // https://forums.swift.org/t/improving-indexing-into-swift-strings/41450/18 + let jsonDetector = "ewoiZG" // "{\"" + let jsonDetector2 = "eyJ0cm" // utf16? + let firstTwoChars = fileContentBase64.prefix(6) + let isJson = firstTwoChars == jsonDetector || firstTwoChars == jsonDetector2 + + // convert to gzip format, so send less data across to Safari + if doCompress { + guard let compressedData: Data = fileContent.gzip() else { return nil } + fileContentBase64 = compressedData.base64EncodedString() + } + + // walk the file and compute the duration if we don't already have it + if isJson && file.duration == 0.0 { + let decoder = JSONDecoder() + let perfettoProfile = try decoder.decode(PerfettoProfile.self, from: fileContent) + + if perfettoProfile.traceEvents == nil { + return nil + } + + file.duration = updateDuration(perfettoProfile.traceEvents!) + + // For now, just log the per-thread info + if file.fileType == .Memory { + updateThreadInfo(perfettoProfile, file) + } + + // This mods the perfetto profile to store parentIndex and durSub + // the call has build specific code right now + //else if file.fileType == .Perf { + // computeEventParentsAndDurSub(&perfettoProfile) + //} + } + } + } + else { + // The data for these is being generated in an async task + // So that the build report can be generated. + + // Clang has some build totals as durations on fake threads + // but those are smaller than the full duration. + var json : Data + + if file.containerType == .Compressed { + guard let unzippedContent = fileContent.gunzip() else { + return nil + } + json = unzippedContent + } + else if file.containerType == .Archive { + // this has already been extracted and decrypted + json = fileContent + } + else { + json = fileContent + } + + // here having to ungzip and decode just to display the content + // have already processed the build files in an async task + let decoder = JSONDecoder() + var perfettoProfile = try decoder.decode(PerfettoProfile.self, from: json) + + if perfettoProfile.traceEvents == nil { + return nil + } + + // demangle the OptFunction name + // This is the only name not demangled + // https://github.com/llvm/llvm-project/issues/45901 + for i in 0.. String? +{ + do { + // https://stackoverflow.com/questions/62035494/how-to-call-postmessage-in-wkwebview-to-js + struct PerfettoFile: Codable { + var buffer: String // really ArrayBuffer, but will get replaced + var title: String + + // About keepApiOpen + // https://github.com/flutter/devtools/blob/master/packages/devtools_app/lib/src/screens/performance/panes/timeline_events/perfetto/_perfetto_web.dart#L174 + var keepApiOpen: Bool + + // optional fields + //var fileName: String? + // url cannot be file://, has to be http served. Can we set fileName? + //var url: String? + } + + struct Perfetto: Codable { + var perfetto: PerfettoFile + } + + // really the url is the only part that needs encoded + let perfetto = Perfetto(perfetto: PerfettoFile(buffer: "", + title: title, + keepApiOpen: true)) + var perfettoEncode = "" + + if true { + let encoder = JSONEncoder() + let data = try encoder.encode(perfetto) + let encodedString = String(decoding: data, as: UTF8.self) + perfettoEncode = String(encodedString.dropLast().dropFirst()) + perfettoEncode = perfettoEncode + .replacingOccurrences(of: "\u{2028}", with: "\\u2028") + .replacingOccurrences(of: "\u{2029}", with: "\\u2029") + } + + let script = """ + + //function bytesToBase64(bytes) { + // const binString = String.fromCodePoint(...bytes); + // return btoa(binString); + + function base64ToBytes(base64) { + const binString = atob(base64); + return Uint8Array.from(binString, (m) => m.codePointAt(0)); + } + + var fileData = '\(fileContentBase64)'; + + // convert from string -> Uint8Array -> ArrayBuffer + var obj = JSON.parse('{\(perfettoEncode)}'); + + // convert base64 back + obj.perfetto.buffer = base64ToBytes(fileData).buffer; + + // Fix race between page load, and loading the file. Although + // page is only loaded once. + + // What if last listener isn't complete, or is doing the postMessage + // JS is all running on one thread though? + + // https://jsfiddle.net/vrsofx1p/ + function waitForUI(obj) + { + const timer = setInterval(() => window.postMessage('PING', '\(ORIGIN)'), 50); + + const onMessageHandler = (evt) => { + if (evt.data !== 'PONG') return; + + // We got a PONG, the UI is ready. + window.clearInterval(timer); + window.removeEventListener('message', onMessageHandler); + + window.postMessage(obj, '\(ORIGIN)'); + } + + window.addEventListener('message', onMessageHandler); + } + + waitForUI(obj); + """ + +// This was trying to block the native drop handler +// // ugh, document does notwork either +// if (false) { +// // tried adding to various parts above. Need to install +// // after the page is open, but this doesn't override the default +// // turn off drop handling, or it won't fixup json or appear in list +// // This doesn't work +// window.addEventListener('drop', function(e) { +// e.preventDefault(); +// e.stopPropagation(); +// }); +// window.addEventListener('dragover', function(e) { +// e.preventDefault(); +// e.stopPropagation(); +// }); +// } + + return script + } + catch { + log.error(error.localizedDescription) + return nil + } +} + +class AppDelegate: NSObject, NSApplicationDelegate { + // Where to set this + var window: NSWindow? + + // don't rename params in this class. These are the function signature + func applicationShouldTerminateAfterLastWindowClosed(_ application: NSApplication) -> Bool { + return true + } +} + +@main +struct kram_profileApp: App { + //@State private var files: [File] = [] + + // @State private var files: [File] = [] + @State private var selection: String? + + // close app when last window is + @NSApplicationDelegateAdaptor private var appDelegate: AppDelegate + + func runJavascript(_ webView: WKWebView, _ script: String) { + webView.evaluateJavaScript(script) { (result, error) in + if error != nil { + log.error("problem running script") + } + } + } + + + // What is used when Inter isn't installed. Can this be bundled? + //let customFont = Font.custom("Inter Variable", size: 14) + + func openFileFromURLs(urls: [URL]) { + droppedFileCache = urls + reopenFileFromURLs() + + // update the document list + let documentController = NSDocumentController.shared + if urls.count >= 1 { + for url in urls { + if url.hasDirectoryPath || isSupportedFilename(url) { + documentController.noteNewRecentDocumentURL(url) + } + } + } + } + + func reopenFileFromURLs() { + let urls = droppedFileCache + + if urls.count >= 1 { + let filesNew = listFilesFromURLs(urls) + + // for now wipe the old list + if filesNew.count > 0 { + // turning this off for now, File must impl Hashable + let mergeFiles = NSEvent.modifierFlags.contains(.option); + + if mergeFiles { + fileSearcher.files = Array(Set(fileSearcher.files + filesNew)) + } + else + { + // reset the list + fileSearcher.files = filesNew + } + + fileSearcher.updateFilesSorted() + + // task to update any build timings + // this saves having to manually visit every file + updateBuildTimingsTask(fileSearcher.files) + + log.debug("found \(fileSearcher.files.count) files") + + // preserve the original selection if still present + if selection != nil { + var found = false + for file in fileSearcher.filesSorted { + if file.id == selection { + found = true + break; + } + } + + // load first file in the list + if !found { + selection = fileSearcher.filesSorted[0].id + } + } + else { + // load first file in the list + selection = fileSearcher.filesSorted[0].id + } + } + } + } + + // This isn't so valuable to open a file, but opening a referenced header from build + // would be. But would to respond to/retrieve selection in JS side. + func openContainingFolder(_ str: String) { + let url = URL(string: str)! + NSWorkspace.shared.activateFileViewerSelecting([url]); + } + +// func isReloadEnabled(_ selection: String?) -> Bool { +// guard let sel = selection else { return false } +// let file = lookupFile(selection: sel) +// return file.isReloadNeeded() +// } + + func openFile() { + let panel = NSOpenPanel() + panel.allowsMultipleSelection = true + panel.canChooseDirectories = true + panel.canChooseFiles = true + panel.allowedContentTypes = fileTypes + + panel.begin { reponse in + if reponse == .OK { + openFileFromURLs(urls: panel.urls) + } + } + } + + func openFileSelection(_ webView: WKWebView) { + if let sel = selection { + + let file = lookupFile(selection: sel) + + // This should only reload if selection previously loaded + // to a valid file, or if modstamp changed on current selection + + // TODO: fix this + let objTimeScript: String? = nil // buildTimeRangeJson(filenameToTimeRange(sel)) + + var str = loadFileJS(file) + if str != nil { + runJavascript(webView, str!) + + // This means Perfetto UI loaded the fileContent, not that fileContent was loaded + file.setLoadStamp() + } + + // Want to be able to lock the scale of the + // trace, so that when moving across traces the range is consistent. + // Otherwise, small traces get expanded to look huge. + // This only works the first time a file loads. + if objTimeScript != nil { + str = showTimeRangeJS(objTimeScript: objTimeScript!) + if str != nil { + runJavascript(webView, str!) + } + } + } + } + + func aboutPanel() { + NSApplication.shared.orderFrontStandardAboutPanel( + options: [ + NSApplication.AboutPanelOptionKey.credits: NSAttributedString( + string: +""" +A tool to help profile mem, perf, and builds. +© 2020-2024 Alec Miller +""", + + attributes: [ + // TODO: fix font + NSAttributedString.Key.font: NSFont.boldSystemFont(ofSize: NSFont.smallSystemFontSize) + ] + ), + NSApplication.AboutPanelOptionKey( + rawValue: "kram-profile" + ): "© 2020-2024 Alec Miller" + ] + ) + } + + let fileTypes: [UTType] = [ + // This is what macOS generates when doing "compress file". But could be archive. + // These are 12x smaller often times. Decompression lib only handles zlib. + .zip, + + // Perfetto can only open gzip and not zip yet + // These are 12x smaller often times + .gzip, + + // A mix of json or binary format files + .json, // clang build files + UTType(filenameExtension:"trace", conformingTo:.data)!, // conformingTo: .json didn't work + UTType(filenameExtension:"memtrace", conformingTo:.data)!, + UTType(filenameExtension:"perftrace", conformingTo:.data)!, + UTType(filenameExtension:"buildtrace", conformingTo:.data)!, + ] + + func selectFile(_ selection: String?, _ fileList: [File], _ advanceList: Bool) -> String? { + guard let sel = selection else { return nil } + if fileList.count == 1 { return selection } + + // TOOD: fix this for search, where the selected item may no longer be + // in the list, find element in the list bounding it + var index = 0 + for i in 0.. Bool { + + // TODO: faster way to find index of file + // probably worth caching up these + for i in 0.. String { + if selection == nil { + return "" + } + return lookupFile(selection: selection!).threadInfo + } + + func isMemoryFileType(_ selection: String?) -> Bool { + if selection == nil { + return false + } + return lookupFile(selection: selection!).fileType == .Memory + } + + // iOS can go compact reduces to 1 column + // can add as a subheading of the text then +// private var isCompact: Bool { +// horizontalSizeClass == .compact +// } + + // here's about inheriting from CoreData, @FetchRquest, NSManagedObject, @NSManaged, etc. + // so it's retained, and not dealing with silly Swift struct getting destroyed. + // and using a class. + // https://useyourloaf.com/blog/swiftui-tables-quick-guide/ + + // Comparators aren't same as KeyPathComparator, ugh + // https://useyourloaf.com/blog/custom-sort-comparators/ + + let buildIcon = Image(systemName: "c.square") // compile + let perfIcon = Image(systemName: "p.square") + let memoryIcon = Image(systemName: "m.square") + let unknownIcon = Image(systemName: "questionmark.app") + + // https://www.hackingwithswift.com/example-code/uikit/how-to-use-system-icons-in-your-app + func generateIcon(_ file: File) -> Image { + switch file.fileType { + case .Build: return buildIcon + case .Memory: return memoryIcon + case .Perf: return perfIcon + case .Unknown: return unknownIcon + } + } + + func recentDocumentsMenu() -> some View { + let documentController = NSDocumentController.shared + let urls = documentController.recentDocumentURLs + + return Menu("Recent Documents…") { + ForEach(0..:Debug>") + + # to turn off exceptions/rtti use /GR and /EHsc replacement +string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + +# all warnings, AVX2, and multiprocess compiles +target_compile_options(${myTargetLib} PRIVATE /W3 -march=haswell -mf16c -mfma /MP /GF /FC) + +target_compile_definitions(${myTargetLib} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0 -DUNICODE -D_UNICODE) + +if (CMAKE_BUILD_TYPE EQUAL "Debug") + target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL") + +elseif (CMAKE_BUILD_TYPE EQUAL "Release") + # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this + target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF") +endif() + +target_link_libraries(${myTargetLib} PRIVATE shlwapi.lib libkram) diff --git a/kram-thumb-win/Dll.cpp b/kram-thumb-win/Dll.cpp new file mode 100644 index 00000000..267e3c5e --- /dev/null +++ b/kram-thumb-win/Dll.cpp @@ -0,0 +1,250 @@ +// based on QOI Thumbnail Provider for Windows Explorer +// Written by iOrange in 2021 +// +// Based on Microsoft's example +// https://github.com/microsoft/windows-classic-samples/tree/main/Samples/Win7Samples/winui/shell/appshellintegration/RecipeThumbnailProvider +// +// Also more info here: +// https://docs.microsoft.com/en-us/previous-versions/windows/desktop/legacy/cc144118(v=vs.85) + +#include +#include // For SHChangeNotify +#include +#include // For IThumbnailProvider. + +#include +#include +#include // For std::size + +// from KramThumbProvider.cpp +extern HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv); + +#define SZ_KramTHUMBHANDLER L"Kram Thumbnail Handler" + +// TODO: update CLSID here, is this a fixed id since Win7 vs. Vista said a provider is different ids? +// keepd in sync with kCLSID +// Just a different way of expressing CLSID in the two values below +// made with uuidgen.exe +#define SZ_CLSID_KramTHUMBHANDLER L"{a9a47ef5-c238-42a9-a4e6-a85558811dac}" +constexpr CLSID kCLSID_KramThumbHandler = {0xa9a47ef5, 0xc238, 0x42a9, {0xa4, 0xe6, 0xa8, 0x55, 0x58, 0x81, 0x1d, 0xac}}; + +typedef HRESULT (*PFNCREATEINSTANCE)(REFIID riid, void** ppvObject); +struct CLASS_OBJECT_INIT { + const CLSID* pClsid; + PFNCREATEINSTANCE pfnCreate; +}; + +// add classes supported by this module here +constexpr CLASS_OBJECT_INIT kClassObjectInit[] = { + {&kCLSID_KramThumbHandler, KramThumbProvider_CreateInstance}}; + +std::atomic_long gModuleReferences(0); +HINSTANCE gModuleInstance = nullptr; + +// Standard DLL functions +STDAPI_(BOOL) +DllMain(HINSTANCE hInstance, DWORD dwReason, void*) +{ + if (DLL_PROCESS_ATTACH == dwReason) { + gModuleInstance = hInstance; + ::DisableThreadLibraryCalls(hInstance); + } + else if (DLL_PROCESS_DETACH == dwReason) { + gModuleInstance = nullptr; + } + return TRUE; +} + +STDAPI DllCanUnloadNow() +{ + // Only allow the DLL to be unloaded after all outstanding references have been released + return (gModuleReferences > 0) ? S_FALSE : S_OK; +} + +void DllAddRef() +{ + ++gModuleReferences; +} + +void DllRelease() +{ + --gModuleReferences; +} + +class CClassFactory : public IClassFactory { +public: + static HRESULT CreateInstance(REFCLSID clsid, const CLASS_OBJECT_INIT* pClassObjectInits, size_t cClassObjectInits, REFIID riid, void** ppv) + { + *ppv = NULL; + HRESULT hr = CLASS_E_CLASSNOTAVAILABLE; + for (size_t i = 0; i < cClassObjectInits; ++i) { + if (clsid == *pClassObjectInits[i].pClsid) { + IClassFactory* pClassFactory = new (std::nothrow) CClassFactory(pClassObjectInits[i].pfnCreate); + hr = pClassFactory ? S_OK : E_OUTOFMEMORY; + if (SUCCEEDED(hr)) { + hr = pClassFactory->QueryInterface(riid, ppv); + pClassFactory->Release(); + } + break; // match found + } + } + return hr; + } + + CClassFactory(PFNCREATEINSTANCE pfnCreate) + : mReferences(1), mCreateFunc(pfnCreate) + { + DllAddRef(); + } + + virtual ~CClassFactory() + { + DllRelease(); + } + + // IUnknown + IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) + { + static const QITAB qit[] = { + QITABENT(CClassFactory, IClassFactory), + {0}}; + return QISearch(this, qit, riid, ppv); + } + + IFACEMETHODIMP_(ULONG) + AddRef() + { + return ++mReferences; + } + + IFACEMETHODIMP_(ULONG) + Release() + { + const long refs = --mReferences; + if (!refs) { + delete this; + } + return refs; + } + + // IClassFactory + IFACEMETHODIMP CreateInstance(IUnknown* punkOuter, REFIID riid, void** ppv) + { + return punkOuter ? CLASS_E_NOAGGREGATION : mCreateFunc(riid, ppv); + } + + IFACEMETHODIMP LockServer(BOOL fLock) + { + if (fLock) { + DllAddRef(); + } + else { + DllRelease(); + } + return S_OK; + } + +private: + std::atomic_long mReferences; + PFNCREATEINSTANCE mCreateFunc; +}; + +STDAPI DllGetClassObject(REFCLSID clsid, REFIID riid, void** ppv) +{ + return CClassFactory::CreateInstance(clsid, kClassObjectInit, std::size(kClassObjectInit), riid, ppv); +} + +// A struct to hold the information required for a registry entry +struct REGISTRY_ENTRY { + HKEY hkeyRoot; + PCWSTR pszKeyName; + PCWSTR pszValueName; + PCWSTR pszData; +}; + +// Creates a registry key (if needed) and sets the default value of the key +HRESULT CreateRegKeyAndSetValue(const REGISTRY_ENTRY* pRegistryEntry) +{ + HKEY hKey; + HRESULT hr = HRESULT_FROM_WIN32(RegCreateKeyExW(pRegistryEntry->hkeyRoot, + pRegistryEntry->pszKeyName, + 0, nullptr, REG_OPTION_NON_VOLATILE, + KEY_SET_VALUE | KEY_WOW64_64KEY, + nullptr, &hKey, nullptr)); + if (SUCCEEDED(hr)) { + hr = HRESULT_FROM_WIN32(RegSetValueExW(hKey, pRegistryEntry->pszValueName, 0, REG_SZ, + reinterpret_cast(pRegistryEntry->pszData), + static_cast(wcslen(pRegistryEntry->pszData) + 1) * sizeof(WCHAR))); + RegCloseKey(hKey); + } + return hr; +} + +// Registers this COM server +STDAPI DllRegisterServer() +{ + HRESULT hr; + WCHAR szModuleName[MAX_PATH] = {0}; + + if (!GetModuleFileNameW(gModuleInstance, szModuleName, ARRAYSIZE(szModuleName))) { + hr = HRESULT_FROM_WIN32(GetLastError()); + } + else { + // List of registry entries we want to create + const REGISTRY_ENTRY registryEntries[] = { + // RootKey KeyName ValueName Data + {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER, nullptr, SZ_KramTHUMBHANDLER}, + {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32", nullptr, szModuleName}, + {HKEY_CURRENT_USER, L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER L"\\InProcServer32", L"ThreadingModel", L"Apartment"}, + + // libkram can decode any of these and create a thumbnail + // The Vista GUID for the thumbnail handler Shell extension is E357FCCD-A995-4576-B01F-234630154E96. + {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx", L"PerceivedType", L"image"}, + {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER}, + {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2", L"PerceivedType", L"image"}, + {HKEY_CURRENT_USER, L"Software\\Classes\\.ktx2\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER}, + {HKEY_CURRENT_USER, L"Software\\Classes\\.dds", L"PerceivedType", L"image"}, + {HKEY_CURRENT_USER, L"Software\\Classes\\.dds\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER}, + //{HKEY_CURRENT_USER, L"Software\\Classes\\.png", L"PerceivedType", L"image"}, + //{HKEY_CURRENT_USER, L"Software\\Classes\\.png\\ShellEx\\{e357fccd-a995-4576-b01f-234630154e96}", nullptr, SZ_CLSID_KramTHUMBHANDLER}, + }; + + hr = S_OK; + for (size_t i = 0; i < std::size(registryEntries) && SUCCEEDED(hr); ++i) { + hr = CreateRegKeyAndSetValue(®istryEntries[i]); + } + } + + if (SUCCEEDED(hr)) { + // This tells the shell to invalidate the thumbnail cache. This is important because any .qoi files + // viewed before registering this handler would otherwise show cached blank thumbnails. + SHChangeNotify(SHCNE_ASSOCCHANGED, SHCNF_IDLIST, nullptr, nullptr); + } + + return hr; +} + +// Unregisters this COM server +STDAPI DllUnregisterServer() +{ + HRESULT hr = S_OK; + + const PCWSTR regKeys[] = { + L"Software\\Classes\\CLSID\\" SZ_CLSID_KramTHUMBHANDLER, + L"Software\\Classes\\.ktx", + L"Software\\Classes\\.ktx2", + L"Software\\Classes\\.dds", + // L"Software\\Classes\\.png", // only need this if Win png bg is bad + }; + + // Delete the registry entries + for (size_t i = 0; i < std::size(regKeys) && SUCCEEDED(hr); ++i) { + hr = HRESULT_FROM_WIN32(RegDeleteTreeW(HKEY_CURRENT_USER, regKeys[i])); + if (hr == HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND)) { + // If the registry entry has already been deleted, say S_OK. + hr = S_OK; + } + } + + return hr; +} diff --git a/kram-thumb-win/Dll.rc b/kram-thumb-win/Dll.rc new file mode 100644 index 00000000..0d950c39 --- /dev/null +++ b/kram-thumb-win/Dll.rc @@ -0,0 +1,38 @@ +#include + +#ifndef DEBUG +#define VER_DEBUG 0 +#else +#define VER_DEBUG VS_FF_DEBUG +#endif + +VS_VERSION_INFO VERSIONINFO +FILEVERSION 0,0,2,0 +PRODUCTVERSION 0,0,2,0 +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEFLAGS VER_DEBUG +FILEOS VOS__WINDOWS32 +FILETYPE VFT_DLL +FILESUBTYPE VFT2_UNKNOWN +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904E4" + BEGIN + VALUE "CompanyName", "ba" + VALUE "FileDescription", "Kram Thumbnail Provider" + VALUE "FileVersion", "0.0.2.0" + VALUE "InternalName", "KramThumbProvider.dll" + VALUE "LegalCopyright", "2023, Alec Miller" + VALUE "LegalTrademarks1", "" + VALUE "LegalTrademarks2", "" + VALUE "OriginalFilename", "KramThumbProvider.dll" + VALUE "ProductName", "KramThumbProvider" + VALUE "ProductVersion", "0, 0, 2, 0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END \ No newline at end of file diff --git a/kram-thumb-win/KramThumbProvider.cpp b/kram-thumb-win/KramThumbProvider.cpp new file mode 100644 index 00000000..e49ccfd5 --- /dev/null +++ b/kram-thumb-win/KramThumbProvider.cpp @@ -0,0 +1,365 @@ +#include +#include // For IThumbnailProvider. +#include // For ComPtr + +#include +#include +#include + +#include "KramLib.h" + +using namespace kram; +using namespace std; // or STL_NAMESPACE + +template +using ComPtr = Microsoft::WRL::ComPtr; + +// This thumbnail provider implements IInitializeWithStream to enable being hosted +// in an isolated process for robustness. +// +// This will build to a DLL +// reg: regsrv32.exe KramThumbProvider.dll +// unreg: regsrv32.exe /u KramThumbProvider.dll + +inline void* KLOGF(uint32_t code, const char* format, ...) +{ + string str; + + va_list args; + va_start(args, format); + /* int32_t len = */ append_vsprintf(str, format, args); + va_end(args); + + // log here, so it can see it in Console. But this never appears. + // How are you supposed to debug failures? Resorted to passing a unique code into this call. + // It wasn't originally supposed to generate an NSError + // NSLog(@"%s", str.c_str()); + + // Console prints this as , so what's the point of producing a localizedString ? + // This doesn't seem to work to Console app, but maybe if logs are to terminal + // sudo log config --mode "level:debug" --subsystem com.hialec.kramv + + //NSString* errorText = [NSString stringWithUTF8String:str.c_str()]; + // return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}]; + return nullptr; +} + +struct ImageToPass { + KTXImage image; + KTXImageData imageData; +}; + +class KramThumbProvider final : public IInitializeWithStream, public IThumbnailProvider { +public: + KramThumbProvider() + : mReferences(1), mStream{} + { + } + + virtual ~KramThumbProvider() + { + } + + // IUnknown + IFACEMETHODIMP QueryInterface(REFIID riid, void** ppv) + { + static const QITAB qit[] = { + QITABENT(KramThumbProvider, IInitializeWithStream), + QITABENT(KramThumbProvider, IThumbnailProvider), + {0}, + }; + return QISearch(this, qit, riid, ppv); + } + + IFACEMETHODIMP_(ULONG) + AddRef() + { + return ++mReferences; + } + + IFACEMETHODIMP_(ULONG) + Release() + { + long refs = --mReferences; + if (!refs) { + delete this; + } + return refs; + } + + // IInitializeWithStream + IFACEMETHODIMP Initialize(IStream* pStream, DWORD /*grfMode*/) + { + HRESULT hr = E_UNEXPECTED; // can only be inited once + if (!mStream) { + // take a reference to the stream if we have not been inited yet + hr = pStream->QueryInterface(mStream.ReleaseAndGetAddressOf()); + } + return hr; + } + + // IThumbnailProvider + IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP* phbmp, WTS_ALPHATYPE* pdwAlpha) + { + // read from stream and create a thumbnail + if (!ImageToHBITMAP(cx, phbmp)) { + return E_OUTOFMEMORY; + } + + // always 4 channels + *pdwAlpha = WTSAT_ARGB; + + return S_OK; + } + +private: + bool ImageToHBITMAP(uint32_t maxSize, HBITMAP* phbmp) + { + if (!mStream) + return false; + + // only know that we have a stream + const char* filename = ""; + + ULARGE_INTEGER streamSizeUint = {}; + IStream_Size(mStream.Get(), &streamSizeUint); + size_t streamSize = (size_t)streamSizeUint.QuadPart; + + // TODO: for now read the entire stream in, but eventually test the first 4-6B for type + vector streamData; + streamData.resize(streamSize); + ULONG bytesRead = 0; + HRESULT hr = mStream->Read(streamData.data(), streamSize, &bytesRead); // can only read ULONG + if (FAILED(hr) || streamSize != bytesRead) + return false; + + // https://learn.microsoft.com/en-us/windows/win32/api/thumbcache/nf-thumbcache-ithumbnailprovider-getthumbnail + + std::shared_ptr imageToPass = std::make_shared(); + TexEncoder decoderType = kTexEncoderUnknown; + uint32_t imageWidth, imageHeight; + + { + KTXImage& image = imageToPass->image; + KTXImageData& imageData = imageToPass->imageData; + + if (!imageData.open(streamData.data(), streamData.size(), image)) { + KLOGF(2, "kramv %s could not open file\n", filename); + return false; + } + + // This will set decoder + auto textureType = MyMTLTextureType2D; // image.textureType + if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) { + KLOGF(3, "format decode only supports ktx and ktx2 output"); + return false; + } + + imageWidth = std::max(1U, image.width); + imageHeight = std::max(1U, image.height); + } + + // This is retina factor + //float requestScale = 1.0; // request.scale; + + // One of the sides must match maximumSize, but can have + // different aspect ratios below that on a given sides. + struct NSSize { + float width; + float height; + }; + NSSize contextSize = {(float)maxSize, (float)maxSize}; + + // compute w/h from aspect ratio of image + float requestWidth, requestHeight; + + float imageAspect = imageWidth / (float)imageHeight; + if (imageAspect >= 1.0f) { + requestWidth = contextSize.width; + requestHeight = std::clamp((contextSize.width / imageAspect), 1.0f, contextSize.height); + } + else { + requestWidth = std::clamp((contextSize.height * imageAspect), 1.0f, contextSize.width); + requestHeight = contextSize.height; + } + + // will be further scaled by requestScale (only on macOS) + contextSize.width = requestWidth; + contextSize.height = requestHeight; + + //----------------- + + KTXImage& image = imageToPass->image; + + bool isPremul = image.isPremul(); + bool isSrgb = isSrgbFormat(image.pixelFormat); + + // unpack a level to get the blocks + uint32_t mipNumber = 0; + uint32_t mipCount = image.mipCount(); + + uint32_t w, h, d; + for (uint32_t i = 0; i < mipCount; ++i) { + image.mipDimensions(i, w, h, d); + if (w > contextSize.width || h > contextSize.height) { + mipNumber++; + } + } + + // clamp to smallest + mipNumber = std::min(mipNumber, mipCount - 1); + image.mipDimensions(mipNumber, w, h, d); + + //----------------- + + uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array? + uint32_t numChunks = image.totalChunks(); + + vector mipData; + + // now decode the blocks in that chunk to Color + if (isBlockFormat(image.pixelFormat)) { + // then decode any blocks to rgba8u, not dealing with HDR formats yet + uint64_t mipLength = image.mipLevels[mipNumber].length; + + if (image.isSupercompressed()) { + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; + + mipData.resize(mipLength * numChunks); + uint8_t* dstData = mipData.data(); + if (!image.unpackLevel(mipNumber, srcData, dstData)) { + // KLOGF("kramv %s failed to unpack mip\n", filename); + return false; + } + + // now extract the chunk for the thumbnail out of that level + if (numChunks > 1) { + macroUnusedVar(chunkNum); + assert(chunkNum == 0); + + // this just truncate to chunk 0 instead of copying chunkNum first + mipData.resize(mipLength); + } + } + else { + // this just truncate to chunk 0 instead of copying chunkNum first + mipData.resize(mipLength); + + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; + + memcpy(mipData.data(), srcData, mipLength); + } + + KramDecoder decoder; + KramDecoderParams params; + params.decoder = decoderType; + + // TODO: should honor swizzle in the ktx image + // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red + // if sdf, will be signed format and that will stay red + + switch (image.pixelFormat) { + // To avoid showing single channel content in red, replicate to rgb + case MyMTLPixelFormatBC4_RUnorm: + case MyMTLPixelFormatEAC_R11Unorm: + params.swizzleText = "rrr1"; + break; + + default: + break; + } + + vector dstMipData; + + // only space for one chunk for now + dstMipData.resize(h * w * sizeof(Color)); + + // want to just decode one chunk of the level that was unpacked abovve + if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) { + // Can't return NSError + // error = KLOGF("kramv %s failed to decode blocks\n", filename); + return false; + } + + // copy over original encoded data + mipData = dstMipData; + } + else if (isExplicitFormat(image.pixelFormat)) { + // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here + // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel() + // but this needs to be slightly different. This will decompress mip again + + Image image2D; + if (!image2D.loadThumbnailFromKTX(image, mipNumber)) { + // KLOGF("kramv %s failed to convert image to 4 channels\n", filename); + return false; + } + + // copy from Color back to uint8_t + uint32_t mipSize = h * w * sizeof(Color); + mipData.resize(mipSize); + memcpy(mipData.data(), image2D.pixels().data(), mipSize); + } + + //--------------------- + + // create a bitmap, and allocate memory for the pixels + BITMAPINFO bmi = {}; + bmi.bmiHeader.biSize = sizeof(bmi.bmiHeader); + bmi.bmiHeader.biWidth = static_cast(w); + bmi.bmiHeader.biHeight = -static_cast(h); // -h to be top-down + bmi.bmiHeader.biPlanes = 1; + bmi.bmiHeader.biBitCount = 32; + bmi.bmiHeader.biCompression = BI_RGB; // TODO: use BI_PNG to shrink thumbnails + + Color* dstPixels = nullptr; + HBITMAP hbmp = CreateDIBSection(nullptr, &bmi, DIB_RGB_COLORS, reinterpret_cast(&dstPixels), nullptr, 0); + if (!hbmp) { + return false; + } + + // TODO: super tiny icons like 2x2 or 4x4 look terrible. Windows says it never upsamples textures + // but a 2x2 thumbnail inside a 32x32 thumbnail isn't visible. Apple does the right thing and upsamples. + + // copy into bgra image (swizzle b and r). + const Color* srcPixels = (const Color*)mipData.data(); + // copy pixels over and swap RGBA -> BGRA + const uint32_t numPixels = w * h; + for (uint32_t i = 0; i < numPixels; ++i) { + // TODO: use uint32_t to do component swizzle + dstPixels[i].r = srcPixels[i].b; + dstPixels[i].g = srcPixels[i].g; + dstPixels[i].b = srcPixels[i].r; + + // setting to 1 for premul is equivalent of blend to opaque black + dstPixels[i].a = 255; + + if (!isPremul) { + uint32_t alpha = srcPixels[i].a; + if (alpha < 255) { + dstPixels[i].r = (dstPixels[i].r * alpha) / 255; + dstPixels[i].g = (dstPixels[i].g * alpha) / 255; + dstPixels[i].b = (dstPixels[i].b * alpha) / 255; + } + } + } + + *phbmp = hbmp; + return true; + } + +private: + std::atomic_long mReferences; + ComPtr mStream; // provided during initialization. +}; + +HRESULT KramThumbProvider_CreateInstance(REFIID riid, void** ppv) +{ + KramThumbProvider* provider = new (std::nothrow) KramThumbProvider(); + HRESULT hr = provider ? S_OK : E_OUTOFMEMORY; + if (SUCCEEDED(hr)) { + hr = provider->QueryInterface(riid, ppv); + provider->Release(); + } + return hr; +} diff --git a/kram-thumb-win/KramThumbProvider.def b/kram-thumb-win/KramThumbProvider.def new file mode 100644 index 00000000..eb96f9fe --- /dev/null +++ b/kram-thumb-win/KramThumbProvider.def @@ -0,0 +1,6 @@ +EXPORTS + DllGetClassObject PRIVATE + DllCanUnloadNow PRIVATE + DllRegisterServer PRIVATE + DllUnregisterServer PRIVATE + DllMain PRIVATE diff --git a/kram-thumb-win/LICENSE b/kram-thumb-win/LICENSE new file mode 100644 index 00000000..1fce8ec0 --- /dev/null +++ b/kram-thumb-win/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 iOrange + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/kram-thumb-win/README.md b/kram-thumb-win/README.md new file mode 100644 index 00000000..1144aaea --- /dev/null +++ b/kram-thumb-win/README.md @@ -0,0 +1,31 @@ +# kram-thumb-win.dll + +Windows thumbnailer for DDS/KTX/KTX2 containers in C++. To use the thumbnailer: + +* Go to build or bin folder. +* Install "regsvr32.exe kram-thumb-win.dll". +* Uninstall "regsvr32.exe /u kram-thumb-win.dll" + +# About kram thumbnailer + +The thumbnailer dll runs the same libkram decoders that kramv thumbnailer uses for macOS. An ancient Win7 thumbnil service calls over to the dll. The Microsoft service harkens back to Win7, was last updated in vista, and their sample didn't work off github. So thankfully a dev on github can cleaned all this up. + +A sanitized stream of bytes is supplied by the Explorer thumbnail service to the dll, the dll uses libkram to decode the image container to a single image, and returns the closest mip as a bitmap to the service. The bitmap is assumed to be sRGB, but there are few details or settings. Explorer caches the thumbnails. Windows also seems to generate thumbnails when apps are tied to specific extensions. + +For some reason, Microsoft doesn't upscale small 2x2 thumbnails. These show up as barely visible dots despite a request for a 32x32 pixel. macOS does upscale these so they are viewable. + +These are the default thumbnail sizes that are subject to change. Note that Microsoft bases dpi off multiples of 96, where macOS uses 72. + +* 32x32 +* 96x96 +* 256x256 +* 1024x1024 + +Adapted from Microsoft sample code that iOrange cleaned to generate thumbnails for QOI images. + +https://github.com/iOrange/QOIThumbnailProvider + +This code doesn't work and the ability to run this as an exe don't seem to be present. + +https://learn.microsoft.com/en-us/samples/microsoft/windows-classic-samples/recipethumbnailprovider/ + diff --git a/kram-thumb-win/resource.h b/kram-thumb-win/resource.h new file mode 100644 index 00000000..1fb651e3 --- /dev/null +++ b/kram-thumb-win/resource.h @@ -0,0 +1,18 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by Dll.rc +// +#define VER_DEBUG 0 +#define VS_VERSION_INFO 1 +#define IDC_STATIC -1 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 101 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1000 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h index 0ebbdcd8..3730f045 100644 --- a/kram-thumb/KramThumbnailProvider.h +++ b/kram-thumb/KramThumbnailProvider.h @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm index e263cde1..6daf720d 100644 --- a/kram-thumb/KramThumbnailProvider.mm +++ b/kram-thumb/KramThumbnailProvider.mm @@ -1,77 +1,78 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. #import "KramThumbnailProvider.h" -#include "KramLib.h" +#import // for vImage #import #import -#import // for vImage + +#include "KramLib.h" using namespace kram; -using namespace NAMESPACE_STL; +using namespace STL_NAMESPACE; @implementation KramThumbnailProvider -inline NSError* KLOGF(uint32_t code, const char* format, ...) { +inline NSError* KLOGF(uint32_t code, const char* format, ...) +{ string str; - + va_list args; va_start(args, format); /* int32_t len = */ append_vsprintf(str, format, args); va_end(args); - + // log here, so it can see it in Console. But this never appears. // How are you supposed to debug failures? Resorted to passing a unique code into this call. // It wasn't originally supposed to generate an NSError //NSLog(@"%s", str.c_str()); - + // Console prints this as , so what's the point of producing a localizedString ? // This doesn't seem to work to Console app, but maybe if logs are to terminal - // sudo log config --mode "level:debug" --subsystem com.ba.kramv - + // sudo log config --mode "level:debug" --subsystem com.hialec.kramv + NSString* errorText = [NSString stringWithUTF8String:str.c_str()]; - return [NSError errorWithDomain:@"com.ba.kramv" code:code userInfo:@{NSLocalizedDescriptionKey:errorText}]; + return [NSError errorWithDomain:@"com.hialec.kramv" code:code userInfo:@{NSLocalizedDescriptionKey : errorText}]; } -struct ImageToPass -{ +struct ImageToPass { KTXImage image; KTXImageData imageData; }; -- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler { +- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest*)request completionHandler:(void (^)(QLThumbnailReply* _Nullable, NSError* _Nullable))handler +{ + // Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system. - // Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system. - const char* filename = [request.fileURL fileSystemRepresentation]; // DONE: could return NSError to caller if non-null NSError* error = nil; string errorText; - + // TODO: use first x-many bytes also to validate, open will do that if (!isSupportedFilename(filename)) { error = KLOGF(1, "kramv %s only supports ktx,ktx2,dds,png files\n", filename); handler(nil, error); return; } - - shared_ptr imageToPass = make_shared(); + + std::shared_ptr imageToPass = std::make_shared(); TexEncoder decoderType = kTexEncoderUnknown; uint32_t imageWidth, imageHeight; - + { KTXImage& image = imageToPass->image; KTXImageData& imageData = imageToPass->imageData; - + if (!imageData.open(filename, image)) { - error = KLOGF(2, "kramv %s coould not open file\n", filename); + error = KLOGF(2, "kramv %s could not open file\n", filename); handler(nil, error); return; } - + // This will set decoder auto textureType = MyMTLTextureType2D; // image.textureType if (!validateFormatAndDecoder(textureType, image.pixelFormat, decoderType)) { @@ -79,210 +80,206 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet handler(nil, error); return; } - - imageWidth = NAMESPACE_STL::max(1U, image.width); - imageHeight = NAMESPACE_STL::max(1U, image.height); + + imageWidth = STL_NAMESPACE::max(1U, image.width); + imageHeight = STL_NAMESPACE::max(1U, image.height); } // This is retina factor float requestScale = request.scale; - + // One of the sides must match maximumSize, but can have // different aspect ratios below that on a given sides. NSSize contextSize = request.maximumSize; - + // compute w/h from aspect ratio of image float requestWidth, requestHeight; - + float imageAspect = imageWidth / (float)imageHeight; - if (imageAspect >= 1.0f) - { + if (imageAspect >= 1.0f) { requestWidth = contextSize.width; - requestHeight = NAMESPACE_STL::clamp((contextSize.width / imageAspect), 1.0, contextSize.height); + requestHeight = std::clamp((contextSize.width / imageAspect), 1.0, contextSize.height); } - else - { - requestWidth = NAMESPACE_STL::clamp((contextSize.height * imageAspect), 1.0, contextSize.width); + else { + requestWidth = std::clamp((contextSize.height * imageAspect), 1.0, contextSize.width); requestHeight = contextSize.height; } - + // will be further scaled by requestScale contextSize = CGSizeMake(requestWidth, requestHeight); - - handler([QLThumbnailReply replyWithContextSize:contextSize drawingBlock:^BOOL(CGContextRef _Nonnull context) - { - KTXImage& image = imageToPass->image; - - bool isPremul = image.isPremul(); - bool isSrgb = isSrgbFormat(image.pixelFormat); - - //----------------- - - // unpack a level to get the blocks - uint32_t mipNumber = 0; - uint32_t mipCount = image.mipCount(); - - uint32_t w, h, d; - for (uint32_t i = 0; i < mipCount; ++i) { - image.mipDimensions(i, w, h, d); - if (w > request.maximumSize.width || h > request.maximumSize.height) { - mipNumber++; - } - } - - // clamp to smallest - mipNumber = std::min(mipNumber, mipCount - 1); - image.mipDimensions(mipNumber, w, h, d); - - //----------------- - - uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array? - uint32_t numChunks = image.totalChunks(); - - vector mipData; - - // now decode the blocks in that chunk to Color - if (isBlockFormat(image.pixelFormat)) { - - // then decode any blocks to rgba8u, not dealing with HDR formats yet - uint64_t mipLength = image.mipLevels[mipNumber].length; - - if (image.isSupercompressed()) { - const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; - - mipData.resize(mipLength * numChunks); - uint8_t* dstData = mipData.data(); - if (!image.unpackLevel(mipNumber, srcData, dstData)) { - //KLOGF("kramv %s failed to unpack mip\n", filename); - return NO; - } - - // now extract the chunk for the thumbnail out of that level - if (numChunks > 1) { - macroUnusedVar(chunkNum); - assert(chunkNum == 0); - - // this just truncate to chunk 0 instead of copying chunkNum first - mipData.resize(mipLength); - } - } - else - { - // this just truncate to chunk 0 instead of copying chunkNum first - mipData.resize(mipLength); - - const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; - - memcpy(mipData.data(), srcData, mipLength); - } - - KramDecoder decoder; - KramDecoderParams params; - params.decoder = decoderType; - - // TODO: should honor swizzle in the ktx image - // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red - // if sdf, will be signed format and that will stay red - - switch(image.pixelFormat) - { - // To avoid showing single channel content in red, replicate to rgb - case MyMTLPixelFormatBC4_RUnorm: - case MyMTLPixelFormatEAC_R11Unorm: - params.swizzleText = "rrr1"; - break; - - default: - break; - } - - vector dstMipData; - - // only space for one chunk for now - dstMipData.resize(h * w * sizeof(Color)); - - // want to just decode one chunk of the level that was unpacked abovve - if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) { - // Can't return NSError - //error = KLOGF("kramv %s failed to decode blocks\n", filename); - return NO; - } - - // copy over original encoded data - mipData = dstMipData; - } - else if (isExplicitFormat(image.pixelFormat)) { - // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here - // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel() - // but this needs to be slightly different. This will decompress mip again - - Image image2D; - if (!image2D.loadThumbnailFromKTX(image, mipNumber)) { - //KLOGF("kramv %s failed to convert image to 4 channels\n", filename); - return NO; - } - - // copy from Color back to uint8_t - uint32_t mipSize = h * w * sizeof(Color); - mipData.resize(mipSize); - memcpy(mipData.data(), image2D.pixels().data(), mipSize); - } - - // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101 - - uint32_t rowBytes = w * sizeof(Color); - - // use vimage in the Accelerate.framework - // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049 - - vImage_Buffer buf = { mipData.data(), h, w, rowBytes }; - - // Declare the pixel format for the vImage_Buffer - vImage_CGImageFormat format = { - .bitsPerComponent = 8, - .bitsPerPixel = 32, - }; - - format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast); - format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB(); - - // don't need to allocate, can reuse memory from mip - bool skipPixelCopy = true; - - vImage_Error err = 0; - CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err); - if (err) { - // Can't return NSError - //error = KLOGF("kramv %s failed create cgimage\n", filename); - return NO; - } - - CGRect rect = CGRectMake(0, 0, - (uint32_t)roundf(contextSize.width * requestScale), - (uint32_t)roundf(contextSize.height * requestScale)); - - // Default is white, but that messes up all content that uses alpha - // and doesn't match the preview code or kramv background (or Preview). - CGContextSetFillColorWithColor(context, CGColorGetConstantColor(kCGColorBlack)); - CGContextFillRect(context, rect); - - // TODO: should this clear to NSColor clearColor ? - // don't want default white? - - // The image is scaled—disproportionately - - //CGContextSetBlendMode(context, kCGBlendModeCopy); - CGContextSetBlendMode(context, kCGBlendModeNormal); - - CGContextDrawImage(context, rect, cgImage); - - // This seems to cause plugin to fail - // Needed? - if (!skipPixelCopy) - CGImageRelease(cgImage); - - return YES; - }], nil); + + handler([QLThumbnailReply replyWithContextSize:contextSize + drawingBlock:^BOOL(CGContextRef _Nonnull context) { + KTXImage& image = imageToPass->image; + + bool isPremul = image.isPremul(); + bool isSrgb = isSrgbFormat(image.pixelFormat); + + //----------------- + + // unpack a level to get the blocks + uint32_t mipNumber = 0; + uint32_t mipCount = image.mipCount(); + + uint32_t w, h, d; + for (uint32_t i = 0; i < mipCount; ++i) { + image.mipDimensions(i, w, h, d); + if (w > request.maximumSize.width || h > request.maximumSize.height) { + mipNumber++; + } + } + + // clamp to smallest + mipNumber = std::min(mipNumber, mipCount - 1); + image.mipDimensions(mipNumber, w, h, d); + + //----------------- + + uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array? + uint32_t numChunks = image.totalChunks(); + + vector mipData; + + // now decode the blocks in that chunk to Color + if (isBlockFormat(image.pixelFormat)) { + // then decode any blocks to rgba8u, not dealing with HDR formats yet + uint64_t mipLength = image.mipLevels[mipNumber].length; + + if (image.isSupercompressed()) { + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; + + mipData.resize(mipLength * numChunks); + uint8_t* dstData = mipData.data(); + if (!image.unpackLevel(mipNumber, srcData, dstData)) { + //KLOGF("kramv %s failed to unpack mip\n", filename); + return NO; + } + + // now extract the chunk for the thumbnail out of that level + if (numChunks > 1) { + macroUnusedVar(chunkNum); + assert(chunkNum == 0); + + // this just truncate to chunk 0 instead of copying chunkNum first + mipData.resize(mipLength); + } + } + else { + // this just truncate to chunk 0 instead of copying chunkNum first + mipData.resize(mipLength); + + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; + + memcpy(mipData.data(), srcData, mipLength); + } + + KramDecoder decoder; + KramDecoderParams params; + params.decoder = decoderType; + + // TODO: should honor swizzle in the ktx image + // TODO: probaby need an snorm rgba format to convert the snorm versions, so they're not all red + // if sdf, will be signed format and that will stay red + + switch (image.pixelFormat) { + // To avoid showing single channel content in red, replicate to rgb + case MyMTLPixelFormatBC4_RUnorm: + case MyMTLPixelFormatEAC_R11Unorm: + params.swizzleText = "rrr1"; + break; + + default: + break; + } + + vector dstMipData; + + // only space for one chunk for now + dstMipData.resize(h * w * sizeof(Color)); + + // want to just decode one chunk of the level that was unpacked abovve + if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) { + // Can't return NSError + //error = KLOGF("kramv %s failed to decode blocks\n", filename); + return NO; + } + + // copy over original encoded data + mipData = dstMipData; + } + else if (isExplicitFormat(image.pixelFormat)) { + // explicit formats like r/rg/rgb and 16f/32F need to be converted to rgba8 here + // this should currently clamp, but could do range tonemap, see Image::convertToFourChannel() + // but this needs to be slightly different. This will decompress mip again + + Image image2D; + if (!image2D.loadThumbnailFromKTX(image, mipNumber)) { + //KLOGF("kramv %s failed to convert image to 4 channels\n", filename); + return NO; + } + + // copy from Color back to uint8_t + uint32_t mipSize = h * w * sizeof(Color); + mipData.resize(mipSize); + memcpy(mipData.data(), image2D.pixels().data(), mipSize); + } + + // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101 + + uint32_t rowBytes = w * sizeof(Color); + + // use vimage in the Accelerate.framework + // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049 + + vImage_Buffer buf = {mipData.data(), h, w, rowBytes}; + + // Declare the pixel format for the vImage_Buffer + vImage_CGImageFormat format = { + .bitsPerComponent = 8, + .bitsPerPixel = 32, + }; + + format.bitmapInfo = kCGBitmapByteOrderDefault | (CGBitmapInfo)(isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast); + format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB(); + + // don't need to allocate, can reuse memory from mip + bool skipPixelCopy = true; + + vImage_Error err = 0; + CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, skipPixelCopy ? kvImageNoAllocate : kvImageNoFlags, &err); + if (err) { + // Can't return NSError + //error = KLOGF("kramv %s failed create cgimage\n", filename); + return NO; + } + + CGRect rect = CGRectMake(0, 0, + (uint32_t)roundf(contextSize.width * requestScale), + (uint32_t)roundf(contextSize.height * requestScale)); + + // Default is white, but that messes up all content that uses alpha + // and doesn't match the preview code or kramv background (or Preview). + CGContextSetFillColorWithColor(context, CGColorGetConstantColor(kCGColorBlack)); + CGContextFillRect(context, rect); + + // TODO: should this clear to NSColor clearColor ? + // don't want default white? + + // The image is scaled—disproportionately + + //CGContextSetBlendMode(context, kCGBlendModeCopy); + CGContextSetBlendMode(context, kCGBlendModeNormal); + + CGContextDrawImage(context, rect, cgImage); + + // This seems to cause plugin to fail + // Needed? + if (!skipPixelCopy) + CGImageRelease(cgImage); + + return YES; + }], + nil); } @end diff --git a/kram-thumb/kram_thumb.entitlements b/kram-thumb/kram_thumb.entitlements index f2ef3ae0..18aff0ce 100644 --- a/kram-thumb/kram_thumb.entitlements +++ b/kram-thumb/kram_thumb.entitlements @@ -2,9 +2,9 @@ - com.apple.security.app-sandbox - - com.apple.security.files.user-selected.read-only - + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-only + diff --git a/kramc/CMakeLists.txt b/kramc/CMakeLists.txt index ba3aef5c..bfb1c9e2 100644 --- a/kramc/CMakeLists.txt +++ b/kramc/CMakeLists.txt @@ -1,67 +1,41 @@ -#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR) - -#----------------------------------------------------- - -set(BUILD_IOS FALSE) -set(BUILD_MAC FALSE) -set(BUILD_WIN FALSE) -set(BUILD_UNIX FALSE) - -if (APPLE) - if (CMAKE_SYSTEM_NAME STREQUAL "iOS") - message("build for iOS") - set(BUILD_IOS TRUE) - else() - message("build for macOS") - set(BUILD_MAC TRUE) - endif() -elseif (WIN32) - message("build for win x64") - set(BUILD_WIN TRUE) -elseif (UNIX AND NOT APPLE) - message("build for unix") - set(BUILD_UNIX TRUE) -endif() - #----------------------------------------------------- # kram # now setup the app project, and link to libkram set(myTargetApp kram) - -# not using version in cmake anymore, this is pulled in by KramVersion.h -if (BUILD_MAC) - project(${myTargetApp} LANGUAGES C CXX OBJCXX) -elseif (BUILD_WIN) - project(${myTargetApp} LANGUAGES C CXX) -endif() - -# **** this the executable target ****, for a CLI App +project(${myTargetApp}) add_executable(${myTargetApp}) #----------------------------------------------------- +if (BUILD_LINUX) + # Enable all warnings, and also enable f16c sims op (only x64 though) + target_compile_options(${myTargetApp} PRIVATE -W -Wall + -mavx2 -mfma -mf16c + -fno-exceptions -fno-rtti + -fdata-sections -ffunction-sections + ) + + # librt is for clock_gettime + target_link_libraries(${myTargetApp} PUBLIC + libkram + rt) + +endif() + if (BUILD_MAC) # ate is a macOS/iOS only library, and it varies in encode support by OS revision - target_link_libraries(${myTargetApp} + target_link_libraries(${myTargetApp} PUBLIC ate - libkram) + libkram + ) set_target_properties(${myTargetApp} PROPERTIES - # Note: match this up with CXX version - # c++11 min - XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14" - XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" - - # avx1 - XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx" + #XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20" + #XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" - # turn off exceptions/rtti - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO - # can't believe this isn't on by default in CMAKE - XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES + #XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES #------------------------- @@ -69,65 +43,78 @@ if (BUILD_MAC) XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym" XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO" + #------------------------- + # app specific settings + # this drops app from 762KB to 174KB with only ATE enabled # note about needing -gfull instead of -gused here or debug info messed up: # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 - XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES - XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" + #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + + # This is LTO + #XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" #------------------------- # for now disable signing, and just "sign to run locally" - XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.kram" + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.hialec.kram" XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO" XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "" ) - target_compile_options(${myTargetApp} PRIVATE -W -Wall) + target_compile_options(${myTargetApp} PRIVATE -W -Wall + -mavx2 -mfma -mf16c + -fno-exceptions -fno-rtti + -fdata-sections -ffunction-sections + ) + + # only turn on in Release in case this disables incremental linking + if (CMAKE_BUILD_TYPE EQUAL "Release") + add_link_options(${myTargetApp}, "-dead_strip") + endif() +endif() -elseif (BUILD_WIN) - target_link_libraries(${myTargetApp} libkram) +#----------------------------------------------------- + +if (BUILD_WIN) + + # Use the static linked libs, or the exe needs the VCRuntimeDLL installed + set_property(TARGET ${myTargetApp} PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" + ) + + target_link_libraries(${myTargetApp} PUBLIC libkram) # When Win rebuilds library, it doesn't relink app to correct code when you # build the app target project. Breakpoints stop working after any library source edit, - # and you have to rebuild solution to get the correct code to exectute. Since 2014. Try this. + # and you have to rebuild solution to get the correct code to execute. Since 2014. Try this. # And BUILD_ALL never launches properly. # https://cmake.org/pipermail/cmake/2014-October/058798.html SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}) SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}) - # TODO: switch to add_target_definitions - - # to turn off exceptions/rtti use /GR and /EHsc replacement - string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - # don't need force with apps, since they only access kram folder files which include KramConfig - # all warnings, AVX1, and multiprocess compiles - target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC) + # all warnings, AVX2, and multiprocess compiles + target_compile_options(${myTargetApp} PRIVATE /W3 + -march=haswell -mf16c -mfma + /GR- /EHs-c- + /MP /GF /FC + ) # fix STL - target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0") - - if (CMAKE_BUILD_TYPE EQUAL "Debug") - target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL") - - elseif (CMAKE_BUILD_TYPE EQUAL "Release") - # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this - target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF") + target_compile_definitions(${myTargetApp} PRIVATE -D_ITERATOR_DEBUG_LEVEL=0 -D_HAS_EXCEPTIONS=0) + + if (CMAKE_BUILD_TYPE EQUAL "Release") + # This disables Incremental linking, so only set on Release + add_link_options(${myTargetApp} PRIVATE "/OPT:REF") # other possibliities # /GL - whole program optimization - # /Gy - edit and continue with function level linking + # /Gy - edit and continue with function level linking (unsupported for clang) # /Oi - enable intrinsic functions endif() - -elseif (BUILD_UNIX) - target_link_libraries(${myTargetApp} libkram) - - # TODO: finish this - + endif() diff --git a/kramc/KramMain.cpp b/kramc/KramMain.cpp index e449bf16..2bf1f999 100644 --- a/kramc/KramMain.cpp +++ b/kramc/KramMain.cpp @@ -1,13 +1,366 @@ #include "KramLib.h" +#if KRAM_MAC +#include +#endif + +#if KRAM_WIN +#include +#endif + +using namespace STL_NAMESPACE; + +// These aren't avx2 specific, but just don't want unused func warning +#if SIMD_AVX2 +#if KRAM_MAC + +inline const char* getMacOSVersion() { + static char str[256] = {}; + if (str[0] == 0) { + size_t size = sizeof(str); + if (sysctlbyname("kern.osproductversion", str, &size, NULL, 0) == 0) { + return str; + } + } + return str; +} + +inline bool isRunningUnderRosetta() { + int ret = 0; + size_t size = sizeof(ret); + if (sysctlbyname("sysctl.proc_translated", &ret, &size, NULL, 0) == -1) { + if (errno == ENOENT) { + // sysctl doesn't exist - not running under Rosetta + return false; + } + // Other error occurred + return false; + } + return ret > 0; +} + + +inline uint32_t getMacOSMajorVersion() { + // 15.4 + static uint32_t majorOSVersion = 0; + if (majorOSVersion == 0) { + sscanf(getMacOSVersion(), "%u", &majorOSVersion); + } + return majorOSVersion; +} + +#endif +#endif + +// TODO: move this into vectormath +void checkSimdSupport() +{ + // Check for AVX2, FMA, F16C support on Intel. + // Still need to set compile flags, and arm64 emulators are also spotty. + // arm64 native has everything needed. No holes to check, or legacy simd. + +#if SIMD_AVX2 +#if KRAM_MAC + // Apple added AVX2 and F16C? support to Rosetta in macOS 15 with no way + // to detect it. Really awesome, so skip the test. There are + // no supporting Intel hw devices on macOS 15 that don't have AVX2. + // const char* macOSVersion = getMacOSVersion(); + // KLOGI("kram", "%s", macOSVersion); + uint32_t majorOSVersion = getMacOSMajorVersion(); + if (majorOSVersion >= 15) { + return; + } + + bool hasSimdSupport = true; + + vector cpuName; + size_t cpuNameSize = 0; + + const char* cpuNameProp = "machdep.cpu.brand_string"; + + if (sysctlbyname(cpuNameProp, nullptr, &cpuNameSize, nullptr, 0) >= 0) { + cpuName.resize(cpuNameSize); + + // Assuming this is ascii + sysctlbyname(cpuNameProp, cpuName.data(), &cpuNameSize, nullptr, 0); + } + + + // can also check AVX1.0 + // F16C (avx/avx2 imply F16C and assume Rosetta too) + + // https://csharpmulticore.blogspot.com/2014/12/how-to-check-intel-avx2-support-on-mac-os-x-haswell.html + // machdep.cpu.features: FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3 PCLMULQDQ DTES64 MON DSCPL VMX EST TM2 SSSE3 FMA CX16 TPR PDCM SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0 RDRAND F16C + // machdep.cpu.leaf7_features: SMEP ERMS RDWRFSGS TSC_THREAD_OFFSET BMI1 AVX2 BMI2 INVPCID + const char* missingFeatures[4] = { "", "", "", "" }; + uint32_t missingFeaturesCount = 0; + + const char* leaf7Features = "machdep.cpu.leaf7_features"; + + size_t leaf7FeatureSize = 0; + sysctlbyname(leaf7Features, nullptr, &leaf7FeatureSize, nullptr, 0); + + vector bufferLeaf7; + + if (leaf7FeatureSize == 0) { + hasSimdSupport = false; + } + else { + bufferLeaf7.resize(leaf7FeatureSize); + + // TODO: check failure + sysctlbyname(leaf7Features, bufferLeaf7.data(), &leaf7FeatureSize, nullptr, 0); + } + + const char* cpuFeatures = "machdep.cpu.features"; + + size_t cpuFeatureSize = 0; + sysctlbyname(cpuFeatures, nullptr, &cpuFeatureSize, nullptr, 0); + + vector bufferFeatures; + + if (!hasSimdSupport || cpuFeatureSize == 0) { + hasSimdSupport = false; + } + else { + bufferFeatures.resize(cpuFeatureSize); + + // TODO: check failure + sysctlbyname(cpuFeatures, bufferFeatures.data(), &cpuFeatureSize, nullptr, 0); + } + + const char* features = !bufferFeatures.empty() ? bufferFeatures.data() : ""; + const char* features7 = !bufferLeaf7.empty() ? bufferLeaf7.data() : ""; + + // If don't find avx2, then support is not present. + // could be running under Rosetta2 but it's supposed to add AVX2 soon. + bool hasAVX2 = strstr(features7, "AVX2") != nullptr; + + if (!hasAVX2) { + missingFeatures[missingFeaturesCount++] = "AVX2 "; + hasSimdSupport = false; + } + + // Make sure compile has enabled these on AVX2. + // Rosetta2 and Prism often don't emulate these. + // (f.e. BMI and F16C) + + bool hasAVX = strstr(features, "AVX") != nullptr; + bool hasF16C = strstr(features, "F16C") != nullptr; + bool hasFMA = strstr(features, "FMA") != nullptr; + + if (!hasAVX) { + missingFeatures[missingFeaturesCount++] = "AVX "; + hasSimdSupport = false; + } + if (!hasF16C) { + missingFeatures[missingFeaturesCount++] = "F16C "; + hasSimdSupport = false; + } + if (!hasFMA) { + missingFeatures[missingFeaturesCount++] = "FMA "; + hasSimdSupport = false; + } + + if (!hasSimdSupport) { + bool isEmulated = isRunningUnderRosetta() && (majorOSVersion < 15); + const char* emulatedHint = isEmulated ? " install macOS 15.0+" : ""; + + KLOGE("Main", "Missing simd support for %s%s%s%son %s%s", + missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3], + cpuName.data(), emulatedHint); + exit(1); + } + +#elif KRAM_WIN + bool hasSimdSupport = true; + + // Also handles Win for ARM (f.e. Prism is SSE4 -> AVX2 soon). + // See here for more bits (f.e. AVX512) + // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170 + + // f1.ecx bit 0 is sse3 + // f1.ecx bit 12 is fma + // f1.ecx bit 19 is sse4.1 + // f1.ecx bit 20 is sse4.2 + // f1.ecx bit 28 is avx + // f1.ecx bit 29 is f16c (docs are wrong about this being avx2) + + // f7.ebx bit 5 is avx2 + // f7.ebx bit 16 is avx-512f + // f7.ebx bit 26 is avx-512pf + // f7.ebx bit 27 is avx-512er + // f7.ebx bit 28 is avx-512cd + + // This returns a count of the ids from mthe docs. + struct CpuInfo { + int eax, ebx, ecx, edx; + }; + + // numIds in 0 + // vendorId (12 char string) returned in 1,3,2 + // can tell intel from amd off vendorId + CpuInfo cpuInfo = {}; + __cpuid((int*)&cpuInfo, 0); + + // This is GenuineIntel or AuthenticAMD + char vendorId[12+1] = {}; + *reinterpret_cast(vendorId + 0) = cpuInfo.ebx; + *reinterpret_cast(vendorId + 4) = cpuInfo.edx; + *reinterpret_cast(vendorId + 8) = cpuInfo.ecx; + + const char* missingFeatures[4] = { "", "", "", "" }; + uint32_t missingFeaturesCount = 0; + + int numIds = cpuInfo.eax; + if (numIds < 7) { + hasSimdSupport = false; + } + else { + // +1 since 0 is the count and vendorId + vector cpuInfoByIndex; + cpuInfoByIndex.resize(numIds+1); + + // This has sse4, avx, f16c + __cpuidex((int*)&cpuInfo, 1, 0); + cpuInfoByIndex[1] = cpuInfo; + + // This has AVX2, avx512 + __cpuidex((int*)&cpuInfo, 7, 0); + cpuInfoByIndex[7] = cpuInfo; + + bool hasAVX2 = cpuInfoByIndex[7].ebx & (1 << 5); + + bool hasFMA = cpuInfoByIndex[1].ecx & (1 << 12); + bool hasAVX = cpuInfoByIndex[1].ecx & (1 << 28); + bool hasF16C = cpuInfoByIndex[1].ecx & (1 << 29); + + if (!hasAVX2) { + missingFeatures[missingFeaturesCount++] = "AVX2 "; + hasSimdSupport = false; + } + if (!hasAVX) { + missingFeatures[missingFeaturesCount++] = "AVX "; + hasSimdSupport = false; + } + if (!hasFMA) { + missingFeatures[missingFeaturesCount++] = "FMA "; + hasSimdSupport = false; + } + if (!hasF16C) { + missingFeatures[missingFeaturesCount++] = "F16C "; + hasSimdSupport = false; + } + } + + // extended cpuid attributes + int extBase = 0x80000000; + __cpuid((int*)&cpuInfo, extBase); + numIds = cpuInfo.eax - extBase; + + char brandId[48+1] = {}; + + if (numIds >= 4) + { + vector cpuInfoByIndex; + cpuInfoByIndex.resize(numIds+1); + + // f81 + __cpuidex((int*)&cpuInfo, extBase+1, 0); + cpuInfoByIndex[1] = cpuInfo; + + // brand + __cpuidex((int*)&cpuInfo, extBase+2, 0); + cpuInfoByIndex[2] = cpuInfo; + __cpuidex((int*)&cpuInfo, extBase+3, 0); + cpuInfoByIndex[3] = cpuInfo; + __cpuidex((int*)&cpuInfo, extBase+4, 0); + cpuInfoByIndex[4] = cpuInfo; + + memcpy(brandId + 0, &cpuInfoByIndex[2], sizeof(CpuInfo)); + memcpy(brandId + 16, &cpuInfoByIndex[3], sizeof(CpuInfo)); + memcpy(brandId + 32, &cpuInfoByIndex[4], sizeof(CpuInfo)); + } + + if (!hasSimdSupport) { + KLOGE("Main", "Missing simd support for %s%s%s%son %s", + missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3], + brandId); + exit(1); + } + +#elif KRAM_LINUX // || KRAM_MAC + + // This should apply to all clang and gcc builds. So may want + // to use on all platforms. + + // Common CPU features that can be checked with __builtin_cpu_supports include: + // sse, sse2, sse3, ssse3, sse4.1, sse4.2 + // avx, avx2, avx512f + // fma + // bmi, bmi2 + // popcnt + // lzcnt + // mmx + + + bool hasSimdSupport = true; + + bool hasAVX2 = __builtin_cpu_supports("avx2"); + + bool hasFMA = __builtin_cpu_supports("fma"); + bool hasAVX = __builtin_cpu_supports("avx"); + + // macOS doesn't support f16c as string? + #if KRAM_MAC + bool hasF16C = true; // a lie + #else + bool hasF16C = __builtin_cpu_supports("f16c"); + #endif + + const char* missingFeatures[4] = { "", "", "", "" }; + uint32_t missingFeaturesCount = 0; + + if (!hasAVX2) { + missingFeatures[missingFeaturesCount++] = "AVX2 "; + hasSimdSupport = false; + } + if (!hasAVX) { + missingFeatures[missingFeaturesCount++] = "AVX "; + hasSimdSupport = false; + } + if (!hasFMA) { + missingFeatures[missingFeaturesCount++] = "FMA "; + hasSimdSupport = false; + } + if (!hasF16C) { + missingFeatures[missingFeaturesCount++] = "F16C "; + hasSimdSupport = false; + } + + if (!hasSimdSupport) { + KLOGE("Main", "Missing simd support for %s%s%s%s", + missingFeatures[0], missingFeatures[1], missingFeatures[2], missingFeatures[3]); + exit(1); + } + +#endif +#endif +} + int main(int argc, char* argv[]) { - int errorCode = kram::kramAppMain(argc, argv); + // This will exit if insufficient simd support on x64. + // arm64+neon has full support of all operations. + checkSimdSupport(); + // verify that machine has simd support to run + int errorCode = kram::kramAppMain(argc, argv); + // returning -1 from main results in exit code of 255, so fix this to return 1 on failure. if (errorCode != 0) { exit(1); } - + return 0; } diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json b/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json index 6f4231ab..7a43aac9 100644 --- a/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json +++ b/kramv/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -6,7 +6,7 @@ "size" : "16x16" }, { - "filename" : "Icon-33.png", + "filename" : "Icon-32 1.png", "idiom" : "mac", "scale" : "2x", "size" : "16x16" @@ -30,7 +30,7 @@ "size" : "128x128" }, { - "filename" : "Icon-257.png", + "filename" : "Icon-256 1.png", "idiom" : "mac", "scale" : "2x", "size" : "128x128" @@ -42,13 +42,13 @@ "size" : "256x256" }, { - "filename" : "Icon-512.png", + "filename" : "Icon-512 1.png", "idiom" : "mac", "scale" : "2x", "size" : "256x256" }, { - "filename" : "Icon-513.png", + "filename" : "Icon-512.png", "idiom" : "mac", "scale" : "1x", "size" : "512x512" diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png index 93907fb6..758e37ff 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-1024.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png index edb6e551..b8f2f369 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-128.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png new file mode 100644 index 00000000..569e8c5e Binary files /dev/null and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256 1.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png index 5b502cba..569e8c5e 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-256.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png deleted file mode 100644 index 5b502cba..00000000 --- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-257.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:41e563b7dd1c783bb57e4edd6987c04b16d1a7f0dd1bd6f91dbc0acbb51dedf2 -size 28931 diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png new file mode 100644 index 00000000..20407b3d --- /dev/null +++ b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32 1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f22c4dc90c013e14aedb31e25c5480dcfd5a49b7d7a2607fdadb3bdcf0bf602 +size 1498 diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png index 88ae8432..67c03303 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-32.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png deleted file mode 100644 index 88ae8432..00000000 --- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-33.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:208893acd43392c4b2d975b924439f8ec1e29c867918664938ca637173a78ee4 -size 1223 diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png new file mode 100644 index 00000000..0906482d Binary files /dev/null and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512 1.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png index a6bb07a1..0906482d 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-512.png differ diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png deleted file mode 100644 index a6bb07a1..00000000 --- a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-513.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad4eb49081bd50fc5656cbbcd1901854778992c52bd00045b523b49beea27a10 -size 87511 diff --git a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png index a4b0c880..9b5b99b0 100644 Binary files a/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png and b/kramv/Assets.xcassets/AppIcon.appiconset/Icon-64.png differ diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard index 1d6caae7..891c23e1 100644 --- a/kramv/Base.lproj/Main.storyboard +++ b/kramv/Base.lproj/Main.storyboard @@ -1,8 +1,8 @@ - + - + @@ -174,7 +174,7 @@ - + @@ -204,7 +204,7 @@ - + diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt index 97d844bf..1e76eab6 100644 --- a/kramv/CMakeLists.txt +++ b/kramv/CMakeLists.txt @@ -39,11 +39,11 @@ target_link_libraries(${myTargetApp} set_target_properties(${myTargetApp} PROPERTIES # Note: match this up with CXX version # c++11 min - XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14" + XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++20" XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" # avx1 - XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx" + XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx2" # turn off exceptions/rtti XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO diff --git a/kramv/Info.plist b/kramv/Info.plist index ce247db3..80291404 100644 --- a/kramv/Info.plist +++ b/kramv/Info.plist @@ -129,7 +129,7 @@ Default LSItemContentTypes - image/dds + com.microsoft.dds NSDocumentClass KramDocument @@ -149,6 +149,19 @@ $(PRODUCT_BUNDLE_PACKAGE_TYPE) CFBundleShortVersionString 1.0 + CFBundleURLTypes + + + CFBundleTypeRole + Viewer + CFBundleURLName + com.hialec.kramv + CFBundleURLSchemes + + kram + + + CFBundleVersion 1 LSApplicationCategoryType diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h index 63991707..3b56d4eb 100644 --- a/kramv/KramLoader.h +++ b/kramv/KramLoader.h @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -16,7 +16,7 @@ namespace kram { class KTXImage; class KTXImageData; -} +} //namespace kram // This loads KTX/2 and PNG data. Moving towards KTX/2 files only, with a PNG // to KTX/2 conversion. @@ -38,7 +38,7 @@ class KTXImageData; - (nullable id)loadTextureFromImage:(const kram::KTXImage &)image originalFormat: (nullable MTLPixelFormat *)originalFormat - name:(nonnull const char*)name; + name:(nonnull const char *)name; // load into KTXImage and KTXImageData, can use with loadTextureFromImage - (BOOL)loadImageFromURL:(nonnull NSURL *)url @@ -69,8 +69,8 @@ class KTXImageData; //#include namespace kram { -using namespace NAMESPACE_STL; +using namespace STL_NAMESPACE; // provide access to lowercase strings string toLower(const string &text); -} +} //namespace kram diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 97a08efe..b9b33426 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -11,8 +11,8 @@ #include "KramLib.h" using namespace kram; -using namespace NAMESPACE_STL; -using namespace simd; +using namespace STL_NAMESPACE; +using namespace SIMD_NAMESPACE; using mymutex = std::recursive_mutex; using mylock = std::unique_lock; @@ -45,12 +45,12 @@ @implementation KramLoader { // only one of these for now id _buffer; - uint8_t* _data; + uint8_t *_data; uint32_t _bufferOffset; vector _blits; - NSMutableArray>* _blitTextures; - NSMutableArray>* _mipgenTextures; + NSMutableArray> *_blitTextures; + NSMutableArray> *_mipgenTextures; } - (instancetype)init @@ -72,25 +72,24 @@ - (instancetype)init originalFormat:originalFormat]; } -// for macOS/win Intel need to decode astc/etc -// on macOS/arm, the M1 supports all 3 encode formats -#define DO_DECODE KRAM_SSE - -#if DO_DECODE - // this means format isnt supported on platform, but can be decoded to rgba to // display -bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) +bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat, MyMTLTextureType type) { bool needsDecode = false; +#if SIMD_SSE if (isETCFormat(pixelFormat)) { needsDecode = true; } else if (isASTCFormat(pixelFormat)) { needsDecode = true; } - +#else + if (isETCFormat(pixelFormat) && type == MyMTLTextureType3D) { + needsDecode = true; + } +#endif return needsDecode; } @@ -99,6 +98,8 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded) KramDecoderParams decoderParams; KramDecoder decoder; + // macOS Intel only had BC support, and already have macOS arm64 build +#if SIMD_SSE if (isETCFormat(image.pixelFormat)) { if (!decoder.decode(image, imageDecoded, decoderParams)) { return NO; @@ -109,8 +110,15 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded) return NO; } } +#else + if (isETCFormat(image.pixelFormat) && image.textureType == MyMTLTextureType3D) { + if (!decoder.decode(image, imageDecoded, decoderParams)) { + return NO; + } + } +#endif else { - assert(false); // don't call this routine if decode not needed + KASSERT(false); // don't call this routine if decode not needed } // TODO: decode BC format on iOS when not supported, but viewer only on macOS @@ -119,8 +127,6 @@ bool decodeImage(const KTXImage &image, KTXImage &imageDecoded) return YES; } -#endif - #if SUPPORT_RGB // TODO: move these into libkram @@ -172,27 +178,23 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) KTXImage image; if (imageDataLength > 3 && - imageData[0] == 0xff && imageData[1] == 0xd8 && imageData[2] == 0xff ) - { + imageData[0] == 0xff && imageData[1] == 0xd8 && imageData[2] == 0xff) { KLOGE("kramv", "loader does not support jpg files"); return nil; } - + // if png, then need to load from KTXImageData which uses loadpng // \x89, P, N, G if (imageDataLength > 4 && - imageData[0] == 137 && imageData[1] == 'P' && imageData[2] == 'N' && imageData[3] == 'G') - { + imageData[0] == 137 && imageData[1] == 'P' && imageData[2] == 'N' && imageData[3] == 'G') { KTXImageData imageDataReader; if (!imageDataReader.open(imageData, imageDataLength, image)) { return nil; } - + return [self loadTextureFromImage:image originalFormat:originalFormat name:""]; } - else - { - + else { // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip // data but have decode etc2/astc path below that uncompressed mips and the // rgb conversion path below as well in the viewer. games would want to @@ -211,7 +213,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) - (nullable id)loadTextureFromImage:(const KTXImage &)image originalFormat: (nullable MTLPixelFormat *)originalFormat - name:(const char*)name + name:(const char *)name { #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { @@ -228,7 +230,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) dstImageInfoArgs.textureType = image.textureType; dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat); dstImageInfoArgs.doMipmaps = - image.mipCount() > 1; // ignore 0 + image.mipCount() > 1; // ignore 0 dstImageInfoArgs.textureEncoder = kTexEncoderExplicit; // set chunk count, so it's explicit @@ -249,7 +251,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) if (originalFormat != nullptr) { *originalFormat = (MTLPixelFormat)rbgaImage2 - .pixelFormat; // TODO: should this return rgbaImage.pixelFormat ? + .pixelFormat; // TODO: should this return rgbaImage.pixelFormat ? } return [self blitTextureFromImage:rbgaImage2 name:name]; @@ -260,8 +262,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) *originalFormat = (MTLPixelFormat)image.pixelFormat; } -#if DO_DECODE - if (isDecodeImageNeeded(image.pixelFormat)) { + if (isDecodeImageNeeded(image.pixelFormat, image.textureType)) { KTXImage imageDecoded; if (!decodeImage(image, imageDecoded)) { return nil; @@ -269,19 +270,17 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) return [self blitTextureFromImage:imageDecoded name:name]; } - else -#endif - { + else { // fast load path directly from mmap'ed data, decompress direct to staging return [self blitTextureFromImage:image name:name]; } } - (BOOL)loadImageFromURL:(nonnull NSURL *)url - image:(KTXImage&)image - imageData:(KTXImageData&)imageData + image:(KTXImage &)image + imageData:(KTXImageData &)imageData { - const char* path = url.absoluteURL.path.UTF8String; + const char *path = url.absoluteURL.path.UTF8String; if (!imageData.open(path, image)) { return NO; } @@ -289,9 +288,9 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url return YES; } -- (nullable id)loadTextureFromURL:(nonnull NSURL*)url +- (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat: - (nullable MTLPixelFormat*)originalFormat + (nullable MTLPixelFormat *)originalFormat { KTXImage image; KTXImageData imageData; @@ -306,7 +305,7 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url - (nullable id)createTexture:(const KTXImage &)image isPrivate:(bool)isPrivate { - MTLTextureDescriptor* textureDescriptor = [[MTLTextureDescriptor alloc] init]; + MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init]; // Indicate that each pixel has a blue, green, red, and alpha channel, where // each channel is an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and @@ -321,6 +320,13 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url textureDescriptor.arrayLength = MAX(1, image.arrayCount()); + // This is inefficient to set, but needed for viewwer. + // Only set if texture type is toggleable. + // only need this if changing components, type, etc. + // { + // textureDescriptor.usage |= MTLTextureUsagePixelFormatView; + // } + // ignoring 0 (auto mip), but might need to support for explicit formats // must have hw filtering support for format, and 32f filtering only first // appeared on A14/M1 and only get box filtering in API-level filters. But @@ -351,7 +357,7 @@ - (void)createStagingBufffer:(uint64_t)dataSize // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube // that get too big - // allocate system memory for bufffer, can memcopy to this + // allocate system memory for buffer, can memcpy to this posix_memalign((void **)&_data, getpagesize(), dataSize); // allocate memory for circular staging buffer, only need to memcpy to this @@ -372,13 +378,13 @@ - (void)uploadTexturesIfNeeded:(id)blitEncoder commandBuffer:(id)commandBuffer { mylock lock(gTextureLock); - + if (!_blits.empty()) { // now upload from staging MTLBuffer to private MTLTexture - for (const auto& blit : _blits) { + for (const auto &blit : _blits) { MTLRegion region = { - {0, 0, 0}, // MTLOrigin - {(NSUInteger)blit.w, (NSUInteger)blit.h, 1} // MTLSize + {0, 0, 0}, // MTLOrigin + {(NSUInteger)blit.w, (NSUInteger)blit.h, 1} // MTLSize }; uint32_t chunkNum = blit.chunkNum; @@ -387,7 +393,7 @@ - (void)uploadTexturesIfNeeded:(id)blitEncoder chunkNum = 0; } - // assert(blit.textureIndex < _blitTextures.count); + // KASSERT(blit.textureIndex < _blitTextures.count); id texture = _blitTextures[blit.textureIndex]; [blitEncoder copyFromBuffer:_buffer @@ -417,8 +423,8 @@ - (void)uploadTexturesIfNeeded:(id)blitEncoder self->_bufferOffset = 0; }]; } - - // mipgen after possible initial blit above + + // mipgen possible after initial blit above if (_mipgenTextures.count > 0) { for (id texture in _mipgenTextures) { // autogen mips will include srgb conversions, so toggling srgb on/off @@ -426,7 +432,7 @@ - (void)uploadTexturesIfNeeded:(id)blitEncoder [blitEncoder generateMipmapsForTexture:texture]; } - // reset the arra + // reset the array [_mipgenTextures removeAllObjects]; } } @@ -434,11 +440,11 @@ - (void)uploadTexturesIfNeeded:(id)blitEncoder - (void)releaseAllPendingTextures { mylock lock(gTextureLock); - + _bufferOffset = 0; - + _blits.clear(); - + [_mipgenTextures removeAllObjects]; [_blitTextures removeAllObjects]; } @@ -452,14 +458,14 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) // (f.e. ktx), and another path for private that uses a blitEncoder and must // have block aligned data (f.e. ktxa, ktx2). Could repack ktx data into ktxa // before writing to temporary file, or when copying NSData into MTLBuffer. -- (nullable id)blitTextureFromImage:(const KTXImage &)image name:(const char*)name +- (nullable id)blitTextureFromImage:(const KTXImage &)image name:(const char *)name { mylock lock(gTextureLock); - + if (_buffer == nil) { // Was set to 128, but models like FlightHelmet.gltf exceeded that buffer static const size_t kStagingBufferSize = 256 * 1024 * 1024; - + // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed // with mips @96MB [self createStagingBufffer:kStagingBufferSize]; @@ -475,10 +481,10 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) id texture = [self createTexture:image isPrivate:true]; if (!texture) return nil; - + // set a label so can identify in captures texture.label = [NSString stringWithUTF8String:name]; - + // this is index where texture will be added uint32_t textureIndex = (uint32_t)_blitTextures.count; @@ -486,7 +492,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) // upload mip levels // TODO: about aligning to 4k for base + length - // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html + // http://metalkit.org/working-with-memory-in-metal/ uint32_t w = image.width; uint32_t h = image.height; @@ -504,8 +510,8 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) size_t blockSize = image.blockSize(); vector bufferOffsets; - uint8_t* bufferData = (uint8_t*)_buffer.contents; - const uint8_t* mipData = (const uint8_t*)image.fileData; + uint8_t *bufferData = (uint8_t *)_buffer.contents; + const uint8_t *mipData = (const uint8_t *)image.fileData; bufferOffsets.resize(image.mipLevels.size()); uint32_t numChunks = image.totalChunks(); @@ -523,8 +529,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) KLOGE("kramv", "Ran out of buffer space to upload images"); return nil; } - - + // this may have to decompress the level data if (!image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset)) { @@ -557,7 +562,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) uint32_t bytesPerRow = 0; // 1D/1DArray textures set bytesPerRow to 0 - if ( // image.textureType != MyMTLTextureType1D && + if ( // image.textureType != MyMTLTextureType1D && image.textureType != MyMTLTextureType1DArray) { // for compressed, bytesPerRow needs to be multiple of block size // so divide by the number of blocks making up the height @@ -605,7 +610,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) mipLevelNumber, mipStorageSize, mipOffset, textureIndex, bytesPerRow, - is3D // could derive from textureIndex lookup + is3D // could derive from textureIndex lookup }); } } diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h index a413b715..adc72336 100644 --- a/kramv/KramRenderer.h +++ b/kramv/KramRenderer.h @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -8,12 +8,16 @@ #import #include "KramLib.h" -#import "KramShaders.h" // for TextureChannels +#import "KramShaders.h" // for TextureChannels // Turn on GLTF loading support for 3d models. This relies on Warren Moore's first GLTFKit // which only handles import and synchronous loading. #define USE_GLTF 1 +// Could use ModelIO on macOS/iOS to support usd files. Adding here for completeness. +// But would prefer cross-platform C++ solution. +#define USE_USD 0 + // Only use a perspective transform for models/images, otherwise perspective only used for models #define USE_PERSPECTIVE 0 @@ -24,11 +28,16 @@ //@import GLTFMTL; #endif - namespace kram { class ShowSettings; +class Data; class KTXImage; -} +} //namespace kram + +// Need renderer to be able to call back up to view to update hud. +@protocol MyMTKViewUpdates +- (void)updateEyedropperText; +@end // Our platform independent renderer class. Implements the MTKViewDelegate // protocol which @@ -41,37 +50,45 @@ class KTXImage; - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings: - (nonnull kram::ShowSettings *)settings; + (nonnull kram::ShowSettings *)settings + data:(nonnull kram::Data *)data; - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString timestamp:(double)timestamp image:(kram::KTXImage &)image imageNormal:(nullable kram::KTXImage *)imageNormal + imageDiff:(nullable kram::KTXImage *)imageDiff isArchive:(BOOL)isArchive; - (BOOL)loadTexture:(nonnull NSURL *)url; -- (simd::float4x4)computeImageTransform:(float)panX - panY:(float)panY - zoom:(float)zoom; - - (BOOL)hotloadShaders:(nonnull const char *)filename; - // unload textures and gltf model textures - (void)releaseAllPendingTextures; // load a gltf model -- (BOOL)loadModel:(nonnull NSURL*)url; +- (BOOL)loadModel:(nonnull const char *)url; // unload gltf model - (void)unloadModel; // called from view and renderer in render loop -- (void)updateAnimationState:(nonnull MTKView*)view; +- (void)updateAnimationState:(nonnull MTKView *)view; + +// So caller can respond to completed callback +- (void)setEyedropperDelegate:(nullable id)delegate; + +// Can enable frame pacing for VRR +- (void)setFramePacingEnabled:(bool)enable; // can play animations in gltf models -@property (nonatomic) BOOL playAnimations; +@property(nonatomic) BOOL playAnimations; -@end +// can toggle on/off srgb if that is psosible +@property(nonatomic) BOOL isToggleView; +// true if a toggle is present +@property(nonatomic) BOOL hasToggleView; + +@end diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 40cbd404..c5985f94 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -16,12 +16,15 @@ // c interface to signposts similar to dtrace on macOS/iOS #include + #include // for recursive_mutex -os_log_t gLogKramv = os_log_create("com.ba.kramv", ""); +using mymutex = std::recursive_mutex; +using mylock = std::unique_lock; + +os_log_t gLogKramv = os_log_create("com.hialec.kramv", ""); -class Signpost -{ +class Signpost { public: Signpost(const char* name) : _name(name), _ended(false) @@ -31,12 +34,12 @@ else _ended = true; } - + ~Signpost() { stop(); } - + void stop() { if (!_ended) { @@ -44,18 +47,14 @@ void stop() _ended = true; } } - + private: const char* _name; bool _ended; }; - #if USE_GLTF -using mymutex = std::recursive_mutex; -using mylock = std::unique_lock; - // TODO: make part of renderer static mymutex gModelLock; @@ -64,12 +63,12 @@ void stop() @interface KramGLTFTextureLoader : NSObject - (instancetype)initWithLoader:(KramLoader*)loader; -- (id _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError **)error; -- (id _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError **)error; +- (id _Nullable)newTextureWithContentsOfURL:(NSURL*)url options:(NSDictionary* _Nullable)options error:(NSError**)error; +- (id _Nullable)newTextureWithData:(NSData*)data options:(NSDictionary* _Nullable)options error:(NSError**)error; @end @interface KramGLTFTextureLoader () -@property (nonatomic, strong) KramLoader* loader; +@property(nonatomic, strong) KramLoader* loader; @end @implementation KramGLTFTextureLoader @@ -83,13 +82,13 @@ - (instancetype)initWithLoader:(KramLoader*)loader } // TODO: this ignores options and error. Default png loading may need to request srgb. -- (id _Nullable)newTextureWithContentsOfURL:(NSURL *)url options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error +- (id _Nullable)newTextureWithContentsOfURL:(NSURL*)url options:(NSDictionary* _Nullable)options error:(NSError* __autoreleasing*)error { return [_loader loadTextureFromURL:url originalFormat:nil]; } // TODO: this ignores options and error. Default png loading may need to request srgb. -- (id _Nullable)newTextureWithData:(NSData *)data options:(NSDictionary * _Nullable)options error:(NSError * __autoreleasing *)error +- (id _Nullable)newTextureWithData:(NSData*)data options:(NSDictionary* _Nullable)options error:(NSError* __autoreleasing*)error { return [_loader loadTextureFromData:data originalFormat:nil]; } @@ -98,11 +97,10 @@ - (instancetype)initWithLoader:(KramLoader*)loader #endif - static const NSUInteger MaxBuffersInFlight = 3; using namespace kram; -using namespace simd; +using namespace SIMD_NAMESPACE; // Capture what we need to build the renderPieplines, without needing view struct ViewFramebufferData { @@ -125,6 +123,8 @@ @implementation Renderer { id _pipelineStateCubeArray; id _pipelineStateVolume; + id _pipelineStateDrawLines; + id _pipelineState1DArrayCS; id _pipelineStateImageCS; id _pipelineStateImageArrayCS; @@ -139,7 +139,10 @@ @implementation Renderer { // TODO: Array< id > _textures; id _colorMap; + id _colorMapView; id _normalMap; + id _diffMap; + id _lastDrawableTexture; // border is a better edge sample, but at edges it filters in the transparent @@ -162,21 +165,11 @@ @implementation Renderer { uint8_t _uniformBufferIndex; - float4x4 _projectionMatrix; - - // 2d versions - float4x4 _viewMatrix; - float4x4 _modelMatrix; - - // 3d versions - float4x4 _viewMatrix3D; - float4x4 _modelMatrix3D; - // float _rotation; KramLoader* _loader; MTKMesh* _mesh; - MDLVertexDescriptor *_mdlVertexDescriptor; + MDLVertexDescriptor* _mdlVertexDescriptor; MTKMesh* _meshRect; MTKMesh* _meshBox; @@ -184,7 +177,7 @@ @implementation Renderer { MTKMesh* _meshSphereMirrored; // MTKMesh* _meshCylinder; MTKMesh* _meshCapsule; - MTKMeshBufferAllocator *_metalAllocator; + MTKMeshBufferAllocator* _metalAllocator; id _shaderLibrary; NSURL* _metallibFileURL; @@ -192,33 +185,46 @@ @implementation Renderer { ViewFramebufferData _viewFramebuffer; ShowSettings* _showSettings; - + Data* _data; + #if USE_GLTF KramGLTFTextureLoader* _textureLoader; id _bufferAllocator; GLTFMTLRenderer* _gltfRenderer; GLTFAsset* _asset; // only 1 for now double _animationTime; - + id _environmentTexture; bool _environmentNeedsUpdate; - + NSURLSession* _urlSession; #endif + __weak id _delegateHud; + + bool _useFramePacing; + double _avgGpuTime; } @synthesize playAnimations; +@synthesize isToggleView; +@synthesize hasToggleView; -- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view - settings:(nonnull ShowSettings *)settings +- (nonnull instancetype)initWithMetalKitView:(nonnull MTKView*)view + settings:(nonnull ShowSettings*)settings + data:(nonnull Data*)data { self = [super init]; if (self) { _showSettings = settings; - + _data = data; _device = view.device; + // 11gb on a 16gb machine. + // Represents the max size of a render encoder. + double kInvOneMB = 1.0 / (1024.0 * 1024.0); + KLOGI("Rendererr", "%0.3f mb", _device.recommendedMaxWorkingSetSize * kInvOneMB ); + _loader = [KramLoader new]; _loader.device = _device; @@ -227,25 +233,24 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight); [self _loadMetalWithView:view]; [self _loadAssets]; - + #if USE_GLTF _bufferAllocator = [[GLTFMTLBufferAllocator alloc] initWithDevice:_device]; _gltfRenderer = [[GLTFMTLRenderer alloc] initWithDevice:_device]; - + // This aliases the existing kram loader, can handle png, ktx, ktx2 _textureLoader = [[KramGLTFTextureLoader alloc] initWithLoader:_loader]; _gltfRenderer.textureLoader = _textureLoader; - + // load the environment from a cube map for now // runs this after _shaderLibrary established above - _gltfRenderer.lightingEnvironment = [[GLTFMTLLightingEnvironment alloc] initWithLibrary: _shaderLibrary]; - + _gltfRenderer.lightingEnvironment = [[GLTFMTLLightingEnvironment alloc] initWithLibrary:_shaderLibrary]; + //NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"piazza_san_marco" withExtension:@"ktx"]; NSURL* environmentURL = [[NSBundle mainBundle] URLForResource:@"tropical_beach" withExtension:@"ktx"]; _environmentTexture = [_loader loadTextureFromURL:environmentURL originalFormat:nil]; _environmentNeedsUpdate = true; #endif - } return self; @@ -253,7 +258,7 @@ - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view - (void)_createSamplers { - MTLSamplerDescriptor *samplerDescriptor = [MTLSamplerDescriptor new]; + MTLSamplerDescriptor* samplerDescriptor = [MTLSamplerDescriptor new]; samplerDescriptor.minFilter = MTLSamplerMinMagFilterNearest; samplerDescriptor.magFilter = MTLSamplerMinMagFilterNearest; samplerDescriptor.mipFilter = MTLSamplerMipFilterNearest; @@ -277,7 +282,7 @@ - (void)_createSamplers samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge; samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge; samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge; - samplerDescriptor.label = @"colorMapSamplerNearsetEdge"; + samplerDescriptor.label = @"colorMapSamplerNearestEdge"; _colorMapSamplerNearestEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor]; @@ -289,7 +294,7 @@ - (void)_createSamplers samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear; - samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices + samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor; samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor; @@ -327,19 +332,19 @@ - (void)_createVertexDescriptor BufferIndexMeshPosition; _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = - MTLVertexFormatFloat2; // TODO: compress + MTLVertexFormatFloat2; // TODO: compress _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0; _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0; _mtlVertexDescriptor.attributes[VertexAttributeNormal].format = - MTLVertexFormatFloat3; // TODO: compress + MTLVertexFormatFloat3; // TODO: compress _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0; _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex = BufferIndexMeshNormal; _mtlVertexDescriptor.attributes[VertexAttributeTangent].format = - MTLVertexFormatFloat4; // TODO: compress + MTLVertexFormatFloat4; // TODO: compress _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0; _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex = BufferIndexMeshTangent; @@ -368,19 +373,114 @@ - (void)_createVertexDescriptor MDLVertexAttributeTangent; } -- (void)_loadMetalWithView:(nonnull MTKView *)view +- (void)_loadMetalWithView:(nonnull MTKView*)view { /// Load Metal state objects and initialize renderer dependent view properties // Important to set color space, or colors are wrong. Why doesn't one of these work (or the default) // false is good for srgb -> rgba16f // true is good for non-srgb -> rgba16f - CGColorSpaceRef viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear); - //bool pickOne = false; - // pickOne ? kCGColorSpaceSRGB : kCGColorSpaceLinearSRGB); + CGColorSpaceRef viewColorSpace; + MTLPixelFormat format = MTLPixelFormatRGBA16Float; + + // This doesn't look like Figma or Photoshop for a rgb,a = 255,0 to 255,1 gradient across a 256px wide rect. The shader is saturating + // the color to 0,1. So can get away with SRGB color space for now. + // This also lines up with Preview. + //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceGenericRGBLinear); + + //CAMetalLayer* metalLayer = (CAMetalLayer*)[view layer]; + + // was using 16f so could sample hdr images from it + // and also so hdr data went out to the display + uint32_t colorSpaceChoice = 1; + switch (colorSpaceChoice) { + default: + case 0: + // This is best so far + format = MTLPixelFormatRGBA16Float; + viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB); + //viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceLinearSRGB); + break; + + case 1: { + // Display P3 is a standard made by Apple that covers the same colour space as DCI-P3, but uses the more neutral D65 as a white point instead of the green white of the DCI standard. + // Ideally feed 16-bit color to P3. + + // This also works + // 25% larger than srgb + format = MTLPixelFormatRGBA16Float; + + // This is industry format + // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDCIP3); + + // This isn't edr + // viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceDisplayP3); + + // Use this because it exists from 10.14.3+ + viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceExtendedLinearDisplayP3); + + // don't set this yet. + // metalLayer.wantsExtendedDynamicRangeContent = YES; + + // https://developer.apple.com/videos/play/wwdc2021/10161/ + + /* Can detect if on HDR display or not + user can mod the brightness, or move to another monitor, + need to listen for notification when this changes. + + NSScreen* screen = NSScreen.mainScreen; + + // This reports 1 + CGFloat val1 = screen.maximumExtendedDynamicRangeColorComponentValue; + + // This is 16 + CGFloat maxPot = screen.maximumPotentialExtendedDynamicRangeColorComponentValue; + + // This is 0 + CGFloat maxRef = screen.maximumReferenceExtendedDynamicRangeColorComponentValue; + */ + + // M1 monitor + + break; + } + case 2: + // This doesn't match wnen srgb is turned off on TestColorGradient + format = MTLPixelFormatRGBA8Unorm_sRGB; + viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceSRGB); + + // this looks totally wrong + //viewColorSpace = CGColorSpaceCreateWithName(kCGColorLinearSpaceSRGB); + break; + + /* + case 3: { + // There is an exrMetadata field on NSView to set as well. + // https://developer.apple.com/documentation/metal/hdr_content/using_color_spaces_to_display_hdr_content?language=objc + + // Rec2020 color primaries, with PQ Transfer function. + // Would have to get into Rec2020 colors to set this, also go from 10bit + format = MTLPixelFormatBGR10A2Unorm; + viewColorSpace = CGColorSpaceCreateWithName(kCGColorSpaceITUR_2100_PQ); + + metalLayer.wantsExtendedDynamicRangeContent = YES; + + // https://developer.apple.com/documentation/metal/hdr_content/using_system_tone_mapping_on_video_content?language=objc + // must do os version check on this + // 1.0 is 100 nits of output + CAEDRMetadata* edrMetaData = [CAEDRMetadata HDR10MetadataWithMinLuminance: 0.005 maxLuminance: 1000 opticalOutputScale: 100]; + metalLayer.EDRMetadata = edrMetaData; + + break; + } + */ + } + + view.colorPixelFormat = format; view.colorspace = viewColorSpace; - - view.colorPixelFormat = MTLPixelFormatRGBA16Float; + + CGColorSpaceRelease(viewColorSpace); + view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8; view.sampleCount = 1; @@ -398,17 +498,13 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view //----------------------- - MTLDepthStencilDescriptor *depthStateDesc = + MTLDepthStencilDescriptor* depthStateDesc = [[MTLDepthStencilDescriptor alloc] init]; - depthStateDesc.depthCompareFunction = _showSettings->isReverseZ - ? MTLCompareFunctionGreaterEqual - : MTLCompareFunctionLessEqual; + depthStateDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual; depthStateDesc.depthWriteEnabled = YES; _depthStateFull = [_device newDepthStencilStateWithDescriptor:depthStateDesc]; - depthStateDesc.depthCompareFunction = _showSettings->isReverseZ - ? MTLCompareFunctionGreaterEqual - : MTLCompareFunctionLessEqual; + depthStateDesc.depthCompareFunction = MTLCompareFunctionGreaterEqual; depthStateDesc.depthWriteEnabled = NO; _depthStateNone = [_device newDepthStencilStateWithDescriptor:depthStateDesc]; @@ -431,7 +527,7 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view [self _createSampleRender]; } -- (BOOL)hotloadShaders:(const char *)filename +- (BOOL)hotloadShaders:(const char*)filename { _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; @@ -450,7 +546,7 @@ - (BOOL)hotloadShaders:(const char *)filename _metallibFileDate = fileDate; // Now dynamically load the metallib - NSData *dataNS = [NSData dataWithContentsOfURL:_metallibFileURL + NSData* dataNS = [NSData dataWithContentsOfURL:_metallibFileURL options:NSDataReadingMappedIfSafe error:&err]; if (dataNS == nil) { @@ -476,7 +572,7 @@ - (BOOL)hotloadShaders:(const char *)filename return YES; } -- (id)_createComputePipeline:(const char *)name +- (id)_createComputePipeline:(const char*)name { NSString* nameNS = [NSString stringWithUTF8String:name]; NSError* error = nil; @@ -511,8 +607,8 @@ - (void)_createComputePipelines [self _createComputePipeline:"SampleImage1DArrayCS"]; } -- (id)_createRenderPipeline:(const char *)vs - fs:(const char *)fs +- (id)_createRenderPipeline:(const char*)vs + fs:(const char*)fs { NSString* vsNameNS = [NSString stringWithUTF8String:vs]; NSString* fsNameNS = [NSString stringWithUTF8String:fs]; @@ -523,14 +619,14 @@ - (void)_createComputePipelines MTLRenderPipelineDescriptor* pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; pipelineStateDescriptor.label = fsNameNS; - pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount; + pipelineStateDescriptor.rasterSampleCount = _viewFramebuffer.sampleCount; pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor; pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat; // Note: blending is disabled on color target, all blending done in shader // since have checkerboard and other stuff to blend against. - + // TODO: could drop these for images, but want a 3D preview of content // or might make these memoryless. pipelineStateDescriptor.depthAttachmentPixelFormat = @@ -581,6 +677,9 @@ - (void)_createRenderPipelines fs:"DrawCubeArrayPS"]; _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS" fs:"DrawVolumePS"]; + + _pipelineStateDrawLines = [self _createRenderPipeline:"DrawLinesVS" + fs:"DrawLinesPS"]; } - (void)_createSampleRender @@ -614,9 +713,9 @@ - (void)_createSampleRender } } -- (MTKMesh *)_createMeshAsset:(const char *)name - mdlMesh:(MDLMesh *)mdlMesh - doFlipUV:(bool)doFlipUV +- (MTKMesh*)_createMeshAsset:(const char*)name + mdlMesh:(MDLMesh*)mdlMesh + doFlipUV:(bool)doFlipUV { NSError* error = nil; @@ -630,10 +729,10 @@ - (MTKMesh *)_createMeshAsset:(const char *)name id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; MDLMeshBufferMap* uvsMap = [uvs map]; - packed_float2* uvData = (packed_float2 *)uvsMap.bytes; + packed_float2* uvData = (packed_float2*)uvsMap.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { - auto &uv = uvData[i]; + auto& uv = uvData[i]; uv.x = 1.0f - uv.x; } @@ -651,13 +750,13 @@ - (MTKMesh *)_createMeshAsset:(const char *)name if (doFlipBitangent) { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent]; MDLMeshBufferMap* uvsMap = [uvs map]; - packed_float4* uvData = (packed_float4 *)uvsMap.bytes; + packed_float4* uvData = (packed_float4*)uvsMap.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { - // if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { - // int bp = 0; - // bp = bp; - // } + // if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { + // int bp = 0; + // bp = bp; + // } uvData[i].w = -uvData[i].w; } @@ -711,12 +810,11 @@ - (MTKMesh *)_createMeshAsset:(const char *)name float x, y, z; }; - - (void)releaseAllPendingTextures { @autoreleasepool { [_loader releaseAllPendingTextures]; - + // also release the model and cached textures in the renderer [self unloadModel]; } @@ -725,54 +823,62 @@ - (void)releaseAllPendingTextures - (void)updateAnimationState:(nonnull MTKView*)view { bool animateDisplay = self.playAnimations; - - // animate the uvPreviw until it reaches endPoint, no scrubber yet + + // animate the uvPreview until it reaches endPoint, no scrubber yet _showSettings->updateUVPreviewState(); - + if (_showSettings->uvPreviewFrames > 0) { _showSettings->uvPreviewFrames--; animateDisplay = true; } - + view.enableSetNeedsDisplay = !animateDisplay; view.paused = !animateDisplay; } +// So caller can respond to completed callback +- (void)setEyedropperDelegate:(nullable id)delegate +{ + _delegateHud = delegate; +} -- (void)updateModelSettings:(const string &)fullFilename +- (void)updateModelSettings:(const string&)fullFilename { _showSettings->isModel = true; _showSettings->numChannels = 0; // hides rgba - + // don't want any scale on view, or as little as possible _showSettings->imageBoundsX = 1; _showSettings->imageBoundsY = 1; - + BOOL isNewFile = YES; [self resetSomeImageSettings:isNewFile]; } -- (BOOL)loadModel:(nonnull NSURL*)url +- (BOOL)loadModel:(nonnull const char*)filename { + NSURL* fileURL = + [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; + #if USE_GLTF - // TODO: move to async version of this, many of these load slow - // but is there a way to cancel the load. Or else move to cgltf which is faster. - // see GLTFKit2. + // TODO: move to async version of this, many of these load slow + // but is there a way to cancel the load. Or else move to cgltf which is faster. + // see GLTFKit2. #define DO_ASYNC 0 #if DO_ASYNC - [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self]; + // [GLTFAsset loadAssetWithURL:url bufferAllocator:_bufferAllocator delegate:self]; #else @autoreleasepool { - GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:url bufferAllocator:_bufferAllocator]; + GLTFAsset* newAsset = [[GLTFAsset alloc] initWithURL:fileURL bufferAllocator:_bufferAllocator]; if (!newAsset) { return NO; } // tie into delegate callback - [self assetWithURL:url didFinishLoading:newAsset]; + [self assetWithURL:fileURL didFinishLoading:newAsset]; } #endif @@ -792,83 +898,13 @@ - (void)unloadModel #endif } -// TODO: remove this -//- (void)updateProjTransform -//{ -// // float aspect = size.width / (float)size.height; -// //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f, -// //100.0f); -// _projectionMatrix = -// orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, -// 100000.0f, _showSettings->isReverseZ); -// -// // DONE: adjust zoom to fit the entire image to the window -// _showSettings->zoomFit = -// MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) / -// MAX(1, MAX((float)_showSettings->imageBoundsX, -// (float)_showSettings->imageBoundsY)); -// -// // already using drawableSize which includes scale -// // TODO: remove contentScaleFactor of view, this can be 1.0 to 2.0f -// // why does this always report 2x even when I change monitor res. -// //_showSettings->zoomFit /= _showSettings->viewContentScaleFactor; -//} - -- (void)updateProjTransform -{ - // Want to move to always using perspective even for 2d images, but still more math - // to work out to keep zoom to cursor working. -#if USE_PERSPECTIVE - float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY; - _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ); - - // This was used to reset zoom to a baseline that had a nice zoom. But little connected to it now. - // Remember with rotation, the bounds can hit the nearClip. Note all shapes are 0.5 radius, - // so at 1 this is 2x to leave gap around the shape for now. - float shapeHeightInY = 1; - _showSettings->zoomFit = shapeHeightInY; // / (float)_showSettings->viewSizeY; - -#else - - if (_showSettings->isModel) { - float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY; - _projectionMatrix = perspective_rhs(90.0f * (M_PI / 180.0f), aspect, 0.1f, 100000.0f, _showSettings->isReverseZ); - - _showSettings->zoomFit = 1; - } - else { - _projectionMatrix = - orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, - 100000.0f, _showSettings->isReverseZ); - - // DONE: adjust zoom to fit the entire image to the window - _showSettings->zoomFit = - MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) / - MAX(1, MAX((float)_showSettings->imageBoundsX, - (float)_showSettings->imageBoundsY)); - - static bool useImageAndViewBounds = true; - if (useImageAndViewBounds) { - float invWidth = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsX); - float invHeight = 1.0f / MAX(1.0f, (float)_showSettings->imageBoundsY); - - // DONE: adjust zoom to fit the entire image to the window - // the best fit depends on dimension of image and window - _showSettings->zoomFit = - MIN( (float)_showSettings->viewSizeX * invWidth, - (float)_showSettings->viewSizeY * invHeight); - } - } -#endif -} - - (void)_createMeshRect:(float)aspectRatioXToY { // This is a box that's smashed down to a thin 2d z plane, can get very close to it // due to the thinness of the volume without nearZ intersect - + /// Load assets into metal objects - MDLMesh *mdlMesh; + MDLMesh* mdlMesh; mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){aspectRatioXToY, 1, 0.001} segments:(vector_uint3){1, 1, 1} @@ -878,7 +914,7 @@ - (void)_createMeshRect:(float)aspectRatioXToY // for some reason normals are all n = 1,0,0 which doesn't make sense on a box // for the side that is being viewed. - + // only one of these for now, but really should store per image _meshRect = [self _createMeshAsset:"MeshRect" mdlMesh:mdlMesh doFlipUV:false]; } @@ -923,19 +959,19 @@ - (void)_loadAssets id posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; MDLMeshBufferMap* posMap = [posBuffer map]; - packed_float3* posData = (packed_float3 *)posMap.bytes; + packed_float3* posData = (packed_float3*)posMap.bytes; id normalBuffer = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; MDLMeshBufferMap* normalsMap = [normalBuffer map]; - packed_float3* normalData = (packed_float3 *)normalsMap.bytes; + packed_float3* normalData = (packed_float3*)normalsMap.bytes; // vertexCount reports 306, but vertex 289+ are garbage - uint32_t numVertices = 289; // mdlMesh.vertexCount + uint32_t numVertices = 289; // mdlMesh.vertexCount for (uint32_t i = 0; i < numVertices; ++i) { { - auto &pos = posData[i]; + auto& pos = posData[i]; // dumb rotate about Y-axis auto copy = pos; @@ -945,7 +981,7 @@ - (void)_loadAssets } { - auto &normal = normalData[i]; + auto& normal = normalData[i]; auto copy = normal; normal.x = copy.x * cosSin.x - copy.z * cosSin.y; normal.z = copy.x * cosSin.y + copy.z * cosSin.x; @@ -954,7 +990,7 @@ - (void)_loadAssets // Hack - knock out all bogus tris from ModelIO that lead to garbage tris for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) { - auto &pos = posData[i]; + auto& pos = posData[i]; pos.x = NAN; } } @@ -981,26 +1017,26 @@ - (void)_loadAssets id uvsBuffer = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; MDLMeshBufferMap* uvsMap = [uvsBuffer map]; - packed_float2* uvData = (packed_float2 *)uvsMap.bytes; + packed_float2* uvData = (packed_float2*)uvsMap.bytes; // this is all aos id posBuffer = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; MDLMeshBufferMap* posMap = [posBuffer map]; - packed_float3 *posData = (packed_float3 *)posMap.bytes; + packed_float3* posData = (packed_float3*)posMap.bytes; id normalsBuffe = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; MDLMeshBufferMap* normalsMap = [normalsBuffe map]; - packed_float3* normalData = (packed_float3 *)normalsMap.bytes; + packed_float3* normalData = (packed_float3*)normalsMap.bytes; // vertexCount reports 306, but vertex 289+ are garbage - uint32_t numVertices = 289; // mdlMesh.vertexCount + uint32_t numVertices = 289; // mdlMesh.vertexCount for (uint32_t i = 0; i < numVertices; ++i) { { - auto &pos = posData[i]; + auto& pos = posData[i]; // dumb rotate about Y-axis auto copy = pos; @@ -1009,18 +1045,18 @@ - (void)_loadAssets } { - auto &normal = normalData[i]; + auto& normal = normalData[i]; auto copy = normal; normal.x = copy.x * cosSin.x - copy.z * cosSin.y; normal.z = copy.x * cosSin.y + copy.z * cosSin.x; } - auto &uv = uvData[i]; + auto& uv = uvData[i]; - // if (uv.x < 0.0 || uv.x > 1.0) { - // int bp = 0; - // bp = bp; - // } + // if (uv.x < 0.0 || uv.x > 1.0) { + // int bp = 0; + // bp = bp; + // } // this makes it counterclockwise 0 to 1 float x = uv.x; @@ -1044,7 +1080,7 @@ - (void)_loadAssets // Hack - knock out all bogus tris from ModelIO that lead to garbage tris for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) { - auto &pos = posData[i]; + auto& pos = posData[i]; pos.x = NAN; } @@ -1071,7 +1107,7 @@ - (void)_loadAssets // doFlipUV:true]; mdlMesh = [MDLMesh newCapsuleWithHeight:1.0 - radii:(vector_float2){1.0f/3.0f, 1.0f/3.0f} // circle + radii:(vector_float2){1.0f / 3.0f, 1.0f / 3.0f} // circle // vertical cap subtracted from height radialSegments:16 verticalSegments:1 @@ -1089,7 +1125,8 @@ - (void)_loadAssets } // this aliases the existing string, so can't chop extension -inline const char* toFilenameShort(const char* filename) { +inline const char* toFilenameShort(const char* filename) +{ const char* filenameShort = strrchr(filename, '/'); if (filenameShort == nullptr) { filenameShort = filename; @@ -1100,22 +1137,20 @@ - (void)_loadAssets return filenameShort; } - -- (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString +- (BOOL)loadTextureFromImage:(nonnull const char*)fullFilenameString timestamp:(double)timestamp - image:(kram::KTXImage &)image - imageNormal:(kram::KTXImage *)imageNormal + image:(kram::KTXImage&)image + imageNormal:(nullable kram::KTXImage*)imageNormal + imageDiff:(nullable kram::KTXImage*)imageDiff isArchive:(BOOL)isArchive { // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format string fullFilename = fullFilenameString; const char* filenameShort = toFilenameShort(fullFilename.c_str()); - - // Note that modstamp can change, but content data hash may be the same - bool isNewFile = (fullFilename != _showSettings->lastFilename); - bool isTextureChanged = - isNewFile || (timestamp != _showSettings->lastTimestamp); + + bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str()); + bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp); if (isTextureChanged) { // synchronously cpu upload from ktx file to buffer, with eventual gpu blit @@ -1131,6 +1166,26 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString return NO; } + bool isPNG = isPNGFilename(fullFilename.c_str()); + + // to be able to turn on/off srgb, need to set a view + id textureView; + MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat; + + // TODO: may only want to offer on png files, where format is + MyMTLPixelFormat viewFormat = textureFormat; + if (isPNG) // && isSrgbFormat(textureFormat)) + viewFormat = toggleSrgbFormat(textureFormat); + if (viewFormat == textureFormat) { + viewFormat = MyMTLPixelFormatInvalid; + } + else { + // This may fail. + textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat]; + + textureView.label = [texture.label stringByAppendingString:@"View"]; + } + // hacking in the normal texture here, so can display them together during // preview id normalTexture; @@ -1143,12 +1198,22 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString } } + id diffTexture; + if (imageDiff) { + // Note: this name may not be the same name + diffTexture = [_loader loadTextureFromImage:*imageDiff + originalFormat:nil + name:filenameShort]; + if (!diffTexture) { + return NO; + } + } + // if archive contained png, then it's been converted to ktx // so the info below may not reflect original data // Would need original png data to look at header // This is only info on image, not on imageNormal - bool isPNG = isPNGFilename(fullFilename.c_str()); if (!isArchive && isPNG) { _showSettings->imageInfo = kramInfoToString(fullFilename, false); _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); @@ -1165,22 +1230,28 @@ - (BOOL)loadTextureFromImage:(nonnull const char *)fullFilenameString _showSettings->lastFilename = fullFilename; _showSettings->lastTimestamp = timestamp; + _showSettings->hasDiffTexture = diffTexture != nil; @autoreleasepool { _colorMap = texture; + _colorMapView = textureView; _normalMap = normalTexture; + _diffMap = diffTexture; + + self.hasToggleView = _colorMapView != nil; } - [self updateImageSettings:fullFilename - image:image]; + // this is the actual format, may have been decoded + MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat; + _data->updateImageSettings(fullFilename, image, format); } - [self resetSomeImageSettings:isNewFile]; + [self resetSomeImageSettings:isTextureNew]; return YES; } -- (BOOL)loadTexture:(nonnull NSURL *)url +- (BOOL)loadTexture:(nonnull NSURL*)url { string fullFilename = url.path.UTF8String; @@ -1193,10 +1264,9 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // DONE: tie this to url and modstamp differences double timestamp = fileDate.timeIntervalSince1970; - bool isNewFile = (fullFilename != _showSettings->lastFilename); - bool isTextureChanged = - isNewFile || (timestamp != _showSettings->lastTimestamp); + bool isTextureNew = _showSettings->isFileNew(fullFilename.c_str()); + bool isTextureChanged = _showSettings->isFileChanged(fullFilename.c_str(), timestamp); // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format @@ -1210,7 +1280,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url } const char* filenameShort = toFilenameShort(fullFilename.c_str()); - + MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid; id texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL @@ -1219,12 +1289,31 @@ - (BOOL)loadTexture:(nonnull NSURL *)url return NO; } + bool isPNG = isPNGFilename(fullFilename.c_str()); + + // to be able to turn on/off srgb, need to set a view + id textureView; + MyMTLPixelFormat textureFormat = (MyMTLPixelFormat)image.pixelFormat; + + // DONE: may only want to offer on png files, where format is + MyMTLPixelFormat viewFormat = textureFormat; + if (isPNG) // && isSrgbFormat(textureFormat)) + viewFormat = toggleSrgbFormat(textureFormat); + if (viewFormat == textureFormat) { + viewFormat = MyMTLPixelFormatInvalid; + } + else { + // This may fail. + textureView = [texture newTextureViewWithPixelFormat:(MTLPixelFormat)viewFormat]; + + textureView.label = [texture.label stringByAppendingString:@"View"]; + } + // This doesn't look for or load corresponding normal map, but should // this is not the png data, but info on converted png to ktx level // But this avoids loading the image 2 more times // Size of png is very different than decompressed or recompressed ktx - bool isPNG = isPNGFilename(fullFilename.c_str()); if (isPNG) { _showSettings->imageInfo = kramInfoToString(fullFilename, false); _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); @@ -1242,222 +1331,47 @@ - (BOOL)loadTexture:(nonnull NSURL *)url _showSettings->lastFilename = fullFilename; _showSettings->lastTimestamp = timestamp; + // TODO: should archive work with diff? + id diffTexture = nil; + _showSettings->hasDiffTexture = diffTexture != nil; + @autoreleasepool { _colorMap = texture; + _colorMapView = textureView; _normalMap = nil; + _diffMap = nil; + + self.hasToggleView = _colorMapView != nil; } - [self updateImageSettings:fullFilename - image:image]; + MyMTLPixelFormat format = (MyMTLPixelFormat)_colorMap.pixelFormat; + _data->updateImageSettings(fullFilename, image, format); } - [self resetSomeImageSettings:isNewFile]; + [self resetSomeImageSettings:isTextureNew]; return YES; } -// only called on new or modstamp-changed image -- (void)updateImageSettings:(const string &)fullFilename - image:(KTXImage &)image -{ - _showSettings->isModel = false; - - // this is the actual format, may have been decoded - id texture = _colorMap; - MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat; - - // format may be trancoded to gpu-friendly format - MyMTLPixelFormat originalFormat = image.pixelFormat; - - _showSettings->blockX = image.blockDims().x; - _showSettings->blockY = image.blockDims().y; - - _showSettings->isSigned = isSignedFormat(format); - - TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str()); - _showSettings->texContentType = texContentType; - //_showSettings->isSDF = isSDF; - - // textures are already premul, so don't need to premul in shader - // should really have 3 modes, unmul, default, premul - bool isPNG = isPNGFilename(fullFilename.c_str()); - - _showSettings->isPremul = false; - if (texContentType == TexContentTypeAlbedo && isPNG) { - _showSettings->isPremul = - true; // convert to premul in shader, so can see other channels - } - - int32_t numChannels = numChannelsOfFormat(originalFormat); - _showSettings->numChannels = numChannels; - - // TODO: identify if texture holds normal data from the props - // have too many 4 channel normals that shouldn't swizzle like this - // kramTextures.py is using etc2rg on iOS for now, and not astc. - - _showSettings->isSwizzleAGToRG = false; - - // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and - // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore. - // if (isASTCFormat(originalFormat) && isNormal) { - // // channels after = "ag01" - // _showSettings->isSwizzleAGToRG = true; - // } - - // can derive these from texture queries - _showSettings->mipCount = (int32_t)image.mipLevels.size(); - _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube || - image.textureType == MyMTLTextureTypeCubeArray) - ? 6 - : 0; - _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements; - _showSettings->sliceCount = (int32_t)image.depth; - - _showSettings->imageBoundsX = (int32_t)image.width; - _showSettings->imageBoundsY = (int32_t)image.height; -} - -float zoom3D = 1.0f; - - (void)resetSomeImageSettings:(BOOL)isNewFile { - // only reset these on new texture, but have to revalidate - if (isNewFile) { - // then can manipulate this after loading - _showSettings->mipNumber = 0; - _showSettings->faceNumber = 0; - _showSettings->arrayNumber = 0; - _showSettings->sliceNumber = 0; - - _showSettings->channels = TextureChannels::ModeRGBA; - - // wish could keep existing setting, but new texture might not - // be supported debugMode for new texture - _showSettings->debugMode = DebugMode::DebugModeNone; - - _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; - } - else { - // reloaded file may have different limits - _showSettings->mipNumber = - std::min(_showSettings->mipNumber, _showSettings->mipCount); - _showSettings->faceNumber = - std::min(_showSettings->faceNumber, _showSettings->faceCount); - _showSettings->arrayNumber = - std::min(_showSettings->arrayNumber, _showSettings->arrayCount); - _showSettings->sliceNumber = - std::min(_showSettings->sliceNumber, _showSettings->sliceCount); - } - - [self updateProjTransform]; + _data->resetSomeImageSettings(isNewFile); // the rect is ar:1 for images float aspectRatioXtoY = _showSettings->imageAspectRatio(); [self _createMeshRect:aspectRatioXtoY]; - - // this controls viewMatrix (global to all visible textures) - _showSettings->panX = 0.0f; - _showSettings->panY = 0.0f; - - _showSettings->zoom = _showSettings->zoomFit; - - // Y is always 1.0 on the plane, so scale to imageBoundsY - // plane is already a non-uniform size, so can keep uniform scale - - // have one of these for each texture added to the viewer - //float scaleX = MAX(1, _showSettings->imageBoundsX); - float scaleY = MAX(1, _showSettings->imageBoundsY); - float scaleX = scaleY; - float scaleZ = scaleY; - - _modelMatrix = - float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale - _modelMatrix = _modelMatrix * - matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back - - // uniform scaled 3d primitive - float scale = scaleY; // MAX(scaleX, scaleY); - - // store the zoom into thew view matrix - // fragment tangents seem to break down at high model scale due to precision - // differences between worldPos and uv -// static bool useZoom3D = false; -// if (useZoom3D) { -// zoom3D = scale; // * _showSettings->viewSizeX / 2.0f; -// scale = 1.0; -// } - - _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale - _modelMatrix3D = - _modelMatrix3D * - matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back -} - -- (float4x4)computeImageTransform:(float)panX - panY:(float)panY - zoom:(float)zoom -{ - // translate - float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0); - - // non-uniform scale is okay here, only affects ortho volume - // setting this to uniform zoom and object is not visible, zoom can be 20x in - // x and y - if (_showSettings->is3DView) { - zoom *= zoom3D; - } - - float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); - viewMatrix = panTransform * viewMatrix; - - // scale - if (_showSettings->is3DView) { - return _projectionMatrix * viewMatrix * _modelMatrix3D; - } - else { - return _projectionMatrix * viewMatrix * _modelMatrix; - } -} - -inline bool almost_equal_elements(float3 v, float tol) -{ - return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol); -} - -inline const float3x3& toFloat3x3(const float4x4 &m) { return (const float3x3 &)m; } - -float4 inverseScaleSquared(const float4x4 &m) -{ - float3 scaleSquared = float3m(length_squared(m.columns[0].xyz), - length_squared(m.columns[1].xyz), - length_squared(m.columns[2].xyz)); - - // if uniform, then set scaleSquared all to 1 - if (almost_equal_elements(scaleSquared, 1e-5f)) { - scaleSquared = float3m(1.0f); - } - - // don't divide by 0 - float3 invScaleSquared = - recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared)); - - // identify determinant here for flipping orientation - // all shapes with negative determinant need orientation flipped for - // backfacing and need to be grouned together if rendering with instancing - float det = determinant(toFloat3x3(m)); - - return float4m(invScaleSquared, det); } - (void)_updateGameState { /// Update any game state before encoding rendering commands to our drawable - Uniforms &uniforms = - *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents; + Uniforms& uniforms = + *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents; uniforms.isNormal = _showSettings->texContentType == TexContentTypeNormal; - uniforms.isPremul = _showSettings->isPremul; + uniforms.doShaderPremul = _showSettings->doShaderPremul; + uniforms.isSrgbInput = _showSettings->isSRGBShown && isSrgbFormat(_showSettings->originalFormat); uniforms.isSigned = _showSettings->isSigned; uniforms.isSwizzleAGToRG = _showSettings->isSwizzleAGToRG; @@ -1483,6 +1397,7 @@ - (void)_updateGameState uniforms.isWrap = doWrap ? _showSettings->isWrap : false; uniforms.isPreview = _showSettings->isPreview; + uniforms.isDiff = _showSettings->isDiff; uniforms.isNormalMapPreview = false; if (uniforms.isPreview) { @@ -1491,7 +1406,7 @@ - (void)_updateGameState if (_normalMap != nil) { uniforms.isNormalMapSigned = isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat); - uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this + uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this } } @@ -1544,7 +1459,7 @@ - (void)_updateGameState case 3: _mesh = _meshSphereMirrored; break; - // case 3: _mesh = _meshCylinder; break; + // case 3: _mesh = _meshCylinder; break; case 4: _mesh = _meshCapsule; break; @@ -1558,64 +1473,36 @@ - (void)_updateGameState uniforms.isInsetByHalfPixel = true; } - // translate - float4x4 panTransform = - matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0); + _data->updateTransforms(); // this is an animated effect, that overlays the shape uv wires over the image uniforms.isUVPreview = _showSettings->uvPreview > 0.0; uniforms.uvPreview = _showSettings->uvPreview; - - // scale - float zoom = _showSettings->zoom; - - if (_showSettings->is3DView) { - _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform - _viewMatrix3D = panTransform * _viewMatrix3D; - - // viewMatrix should typically be the inverse - //_viewMatrix = simd_inverse(_viewMatrix3D); - - float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D; - uniforms.projectionViewMatrix = projectionViewMatrix; - // works when only one texture, but switch to projectViewMatrix - uniforms.modelMatrix = _modelMatrix3D; - - uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D); - - _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f; - - // cache the camera position - uniforms.cameraPosition = - inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho + uniforms.uvToShapeRatio = 1.0f; + switch (_showSettings->meshNumber) { + case 0: + if (_showSettings->imageBoundsY) + uniforms.uvToShapeRatio = _showSettings->imageBoundsX / (float)_showSettings->imageBoundsY; + break; + case 2: + uniforms.uvToShapeRatio = 2.0f; + break; + case 4: + uniforms.uvToShapeRatio = 2.0f * M_PI * 0.3333f; + break; } - else { - _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); - _viewMatrix = panTransform * _viewMatrix; + uniforms.projectionViewMatrix = _data->_projectionViewMatrix; + uniforms.cameraPosition = _data->_cameraPosition; - // viewMatrix should typically be the inverse - //_viewMatrix = simd_inverse(_viewMatrix3D); - - float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix; - uniforms.projectionViewMatrix = projectionViewMatrix; - - // works when only one texture, but switch to projectViewMatrix - uniforms.modelMatrix = _modelMatrix; - - uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix); - - _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f; - - // cache the camera position - uniforms.cameraPosition = - inverse(_viewMatrix).columns[3].xyz; // this is all ortho - } + // This is per object + uniforms.modelMatrix = _data->_modelMatrix; + // uniforms.modelMatrixInvScale2 = _data->_modelMatrixInvScale2; //_rotation += .01; } -- (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD +- (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD { uniforms.mipLOD = mipLOD; @@ -1659,14 +1546,14 @@ - (void)_setUniformsLevel:(UniformsLevel &)uniforms mipLOD:(int32_t)mipLOD } } -- (void)drawInMTKView:(nonnull MTKView *)view +- (void)drawInMTKView:(nonnull MTKView*)view { @autoreleasepool { // Per frame updates here // update per frame state [self updateAnimationState:view]; - + // TODO: move this out, needs to get called off mouseMove, but don't want to // call drawMain [self drawSample]; @@ -1675,29 +1562,29 @@ - (void)drawInMTKView:(nonnull MTKView *)view Signpost postWait("waitOnSemaphore"); dispatch_semaphore_wait(_inFlightSemaphore, DISPATCH_TIME_FOREVER); postWait.stop(); - + _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight; id commandBuffer = [_commandQueue commandBuffer]; commandBuffer.label = @"MyCommand"; __block dispatch_semaphore_t block_sema = _inFlightSemaphore; - - #if USE_GLTF - GLTFMTLRenderer* gltfRenderer = _gltfRenderer; - [commandBuffer addCompletedHandler:^(id /* buffer */) { - [gltfRenderer signalFrameCompletion]; - - // increment count - dispatch_semaphore_signal(block_sema); - }]; - - #else - [commandBuffer addCompletedHandler:^(id /* buffer */) { - // increment count - dispatch_semaphore_signal(block_sema); - }]; - #endif + +#if USE_GLTF + GLTFMTLRenderer* gltfRenderer = _gltfRenderer; + [commandBuffer addCompletedHandler:^(id /* buffer */) { + [gltfRenderer signalFrameCompletion]; + + // increment count + dispatch_semaphore_signal(block_sema); + }]; + +#else + [commandBuffer addCompletedHandler:^(id /* buffer */) { + // increment count + dispatch_semaphore_signal(block_sema); + }]; +#endif [self _updateGameState]; @@ -1719,50 +1606,85 @@ - (void)drawInMTKView:(nonnull MTKView *)view [self drawMain:commandBuffer view:view]; postDraw.stop(); - + // hold onto this for sampling from it via eyedropper id drawable = view.currentDrawable; _lastDrawableTexture = drawable.texture; // These are equivalent // [commandBuffer presentDrawable:view.currentDrawable]; + + typeof(self) __weak weakSelf = self; [commandBuffer addScheduledHandler:^(id cmdBuf) { + if (cmdBuf.error) return; Signpost postPresent("presentDrawble"); - [drawable present]; + [weakSelf _present:drawable]; + }]; + + // This only works if only using one commandBuffer + [commandBuffer addCompletedHandler:^(id cmdBuf) { + if (cmdBuf.error) return; + double gpuTime = cmdBuf.GPUEndTime - cmdBuf.GPUStartTime; + [weakSelf _updateFramePacing:gpuTime]; }]; [commandBuffer commit]; } } +- (void)_present:(id)drawable +{ + if (_useFramePacing) + [drawable presentAfterMinimumDuration:_avgGpuTime]; + else + [drawable present]; +} + +- (void)_updateFramePacing:(double)gpuTime +{ + if (_useFramePacing) { + _avgGpuTime = lerp(_avgGpuTime, gpuTime, 0.25); + } +} + +- (void)setFramePacingEnabled:(bool)enable +{ + if (_useFramePacing != enable) { + _useFramePacing = enable; + + // this will get adjusted by updateFramePacing + _avgGpuTime = 1.0 / 60.0; + } +} + #if USE_GLTF -static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b) { +static GLTFBoundingSphere GLTFBoundingSphereFromBox2(const GLTFBoundingBox b) +{ GLTFBoundingSphere s; float3 center = 0.5f * (b.minPoint + b.maxPoint); - float r = simd::distance(b.maxPoint, center); - + float r = distance(b.maxPoint, center); + s.center = center; s.radius = r; return s; } #endif - - (void)drawMain:(id)commandBuffer - view:(nonnull MTKView *)view + view:(nonnull MTKView*)view { // Delay getting the currentRenderPassDescriptor until absolutely needed. This // avoids // holding onto the drawable and blocking the display pipeline any longer // than necessary MTLRenderPassDescriptor* renderPassDescriptor = nil; - + // This retrieval can take 20ms+ when gpu is busy Signpost post("nextDrawable"); renderPassDescriptor = view.currentRenderPassDescriptor; post.stop(); - + if (renderPassDescriptor == nil) { return; } @@ -1771,8 +1693,7 @@ - (void)drawMain:(id)commandBuffer #if USE_GLTF && _asset == nil #endif - ) - { + ) { // this will clear target id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; @@ -1788,9 +1709,8 @@ - (void)drawMain:(id)commandBuffer #if USE_GLTF { mylock lock(gModelLock); - + if (_asset) { - // TODO: needs to be done in the render loop, since it must run compute // This runs compute to generate radiance/irradiance in mip levels // Also an equirect version for a 2d image @@ -1799,14 +1719,13 @@ - (void)drawMain:(id)commandBuffer [_gltfRenderer.lightingEnvironment generateFromCubeTexture:_environmentTexture commandBuffer:commandBuffer]; else [_gltfRenderer.lightingEnvironment generateFromEquirectTexture:_environmentTexture commandBuffer:commandBuffer]; - + _environmentNeedsUpdate = false; } } } #endif - // Final pass rendering code here id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; @@ -1824,18 +1743,18 @@ - (void)drawMain:(id)commandBuffer [renderEncoder setDepthStencilState:_depthStateFull]; bool drawShape = true; - - #if USE_GLTF + +#if USE_GLTF { mylock lock(gModelLock); if (_asset) { drawShape = false; - + // update animations if (self.playAnimations) { - _animationTime += 1.0/60.0; - + _animationTime += 1.0 / 60.0; + NSTimeInterval maxAnimDuration = 0; for (GLTFAnimation* animation in _asset.animations) { for (GLTFAnimationChannel* channel in animation.channels) { @@ -1844,51 +1763,58 @@ - (void)drawMain:(id)commandBuffer } } } - + NSTimeInterval animTime = fmod(_animationTime, maxAnimDuration); - + for (GLTFAnimation* animation in _asset.animations) { [animation runAtTime:animTime]; } } - + // regularization scales the model to 1 unit dimension, may animate out of this box // just a scale to diameter 1, and translate back from center and viewer z GLTFBoundingSphere bounds = GLTFBoundingSphereFromBox2(_asset.defaultScene.approximateBounds); float invScale = (bounds.radius > 0) ? (0.5 / (bounds.radius)) : 1.0; - float4x4 centerScale = float4x4(float4m(invScale,invScale,invScale,1)); + float4x4 centerScale = float4x4(float4m(invScale, invScale, invScale, 1)); + +#if USE_SIMDLIB + float4x4 centerTranslation = float4x4::identity(); +#else float4x4 centerTranslation = matrix_identity_float4x4; +#endif centerTranslation.columns[3] = vector4(-bounds.center, 1.0f); float4x4 regularizationMatrix = centerScale * centerTranslation; - + // incorporate the rotation now - Uniforms &uniforms = - *(Uniforms *)_dynamicUniformBuffer[_uniformBufferIndex].contents; - + Uniforms& uniforms = + *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents; + regularizationMatrix = regularizationMatrix * uniforms.modelMatrix; - + // TODO: be able to pass regularization to affect root of modelMatrix tree, // do not modify viewMatrix here since that messes with world space. - + // set the view and projection matrix - _gltfRenderer.viewMatrix = _viewMatrix * regularizationMatrix; - _gltfRenderer.projectionMatrix = _projectionMatrix; - - [renderEncoder pushDebugGroup:@"DrawModel"]; + float4x4 m = _data->_viewMatrix * regularizationMatrix; + + // TODO: offer conversions to simd/simd.h + _gltfRenderer.viewMatrix = reinterpret_cast(m); + _gltfRenderer.projectionMatrix = reinterpret_cast(_data->_projectionMatrix); + + RenderScope drawModelScope(renderEncoder, "DrawModel"); [_gltfRenderer renderScene:_asset.defaultScene commandBuffer:commandBuffer commandEncoder:renderEncoder]; - [renderEncoder popDebugGroup]; } } - #endif - +#endif + if (drawShape) { - [renderEncoder pushDebugGroup:@"DrawShape"]; + RenderScope drawShapeScope(renderEncoder, "DrawShape"); // set the mesh shape for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count; bufferIndex++) { - MTKMeshBuffer *vertexBuffer = _mesh.vertexBuffers[bufferIndex]; - if ((NSNull *)vertexBuffer != [NSNull null]) { + MTKMeshBuffer* vertexBuffer = _mesh.vertexBuffers[bufferIndex]; + if ((NSNull*)vertexBuffer != [NSNull null]) { [renderEncoder setVertexBuffer:vertexBuffer.buffer offset:vertexBuffer.offset atIndex:bufferIndex]; @@ -1959,18 +1885,26 @@ - (void)drawMain:(id)commandBuffer offset:0 atIndex:BufferIndexUniforms]; + id tex = _colorMap; + if (self.isToggleView && _colorMap && _colorMapView) + tex = _colorMapView; + // set the texture up - [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor]; + [renderEncoder setFragmentTexture:tex atIndex:TextureIndexColor]; // setup normal map if (_normalMap && _showSettings->isPreview) { [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal]; } + if (_diffMap && _showSettings->isDiff) { + [renderEncoder setFragmentTexture:_diffMap atIndex:TextureIndexDiff]; + } + UniformsLevel uniformsLevel; uniformsLevel.drawOffset = float2m(0.0f); uniformsLevel.passNumber = kPassDefault; - + if (_showSettings->isPreview) { // upload this on each face drawn, since want to be able to draw all // mips/levels at once @@ -2004,11 +1938,12 @@ - (void)drawMain:(id)commandBuffer // by the zoom int32_t gap = _showSettings - ->showAllPixelGap; // * _showSettings->viewContentScaleFactor; + ->showAllPixelGap; // * _showSettings->viewContentScaleFactor; for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) { // upload this on each face drawn, since want to be able to draw all // mips/levels at once + [self _setUniformsLevel:uniformsLevel mipLOD:mip]; if (mip == 0) { @@ -2023,6 +1958,8 @@ - (void)drawMain:(id)commandBuffer int32_t numLevels = _showSettings->totalChunks(); for (int32_t level = 0; level < numLevels; ++level) { + RenderScope drawLevelScope(renderEncoder, "DrawLevel"); + if (isCube) { uniformsLevel.face = level % 6; uniformsLevel.arrayOrSlice = level / 6; @@ -2067,6 +2004,59 @@ - (void)drawMain:(id)commandBuffer } } } + + for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) { + // upload this on each face drawn, since want to be able to draw all + // mips/levels at once + + [self _setUniformsLevel:uniformsLevel mipLOD:mip]; + + if (mip == 0) { + uniformsLevel.drawOffset.y = 0.0f; + } + else { + // all mips draw at top mip size currently + uniformsLevel.drawOffset.y -= h + gap; + } + + // this its ktxImage.totalChunks() + int32_t numLevels = _showSettings->totalChunks(); + + for (int32_t level = 0; level < numLevels; ++level) { + if (isCube) { + uniformsLevel.face = level % 6; + uniformsLevel.arrayOrSlice = level / 6; + } + else { + uniformsLevel.arrayOrSlice = level; + } + + // advance x across faces/slices/array elements, 1d array and 2d thin + // array are weird though. + if (level == 0) { + uniformsLevel.drawOffset.x = 0.0f; + } + else { + uniformsLevel.drawOffset.x += w + gap; + } + + [renderEncoder setVertexBytes:&uniformsLevel + length:sizeof(uniformsLevel) + atIndex:BufferIndexUniformsLevel]; + + // [renderEncoder setFragmentBytes:&uniformsLevel + // length:sizeof(uniformsLevel) + // atIndex:BufferIndexUniformsLevel]; + + // force lod, and don't mip + // [renderEncoder setFragmentSamplerState:sampler + // lodMinClamp:mip + // lodMaxClamp:mip + 1 + // atIndex:SamplerIndexColor]; + // + [self drawAtlas:renderEncoder]; + } + } } else { int32_t mip = _showSettings->mipNumber; @@ -2093,7 +2083,6 @@ - (void)drawMain:(id)commandBuffer // mips on on screen faces and arrays and slices go across in a row, and // mips are displayed down from each of those in a column - for (MTKSubmesh* submesh in _mesh.submeshes) { [renderEncoder drawIndexedPrimitives:submesh.primitiveType indexCount:submesh.indexCount @@ -2101,21 +2090,23 @@ - (void)drawMain:(id)commandBuffer indexBuffer:submesh.indexBuffer.buffer indexBufferOffset:submesh.indexBuffer.offset]; } - + // Draw uv wire overlay if (_showSettings->is3DView && _showSettings->uvPreview > 0.0) { // need to force color in shader or it's still sampling texture // also need to add z offset - + + RenderScope drawUVPreviewScope(renderEncoder, "DrawUVPreview"); + [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines]; - + // only applies to tris, not points/lines, pushes depth away (towards 0), after clip // affects reads/tests and writes. Could also add in vertex shader. // depthBias * 2^(exp(max abs(z) in primitive) - r) + slopeScale * maxSlope - [renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp: 0.02]; - + [renderEncoder setDepthBias:0.015 slopeScale:3.0 clamp:0.02]; + uniformsLevel.passNumber = kPassUVPreview; - + [renderEncoder setVertexBytes:&uniformsLevel length:sizeof(uniformsLevel) atIndex:BufferIndexUniformsLevel]; @@ -2131,27 +2122,104 @@ - (void)drawMain:(id)commandBuffer indexBuffer:submesh.indexBuffer.buffer indexBufferOffset:submesh.indexBuffer.offset]; } - + uniformsLevel.passNumber = kPassDefault; - + // restore state, even though this isn't a true state shadow [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0]; - [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill]; - } + + [self drawAtlas:renderEncoder]; } } - - [renderEncoder popDebugGroup]; } [renderEncoder endEncoding]; - // TODO: run any post-processing on each texture visible as fsw + // TODO: run any post-processing on each texture visible as fsq // TODO: environment map preview should be done as fsq } +class RenderScope { +public: + RenderScope(id encoder_, const char* name) + : encoder(encoder_) + { + id enc = (id)encoder; + [enc pushDebugGroup:[NSString stringWithUTF8String:name]]; + } + + void close() + { + if (encoder) { + id enc = (id)encoder; + [enc popDebugGroup]; + encoder = nil; + } + } + + ~RenderScope() + { + close(); + } + +private: + id encoder; +}; + +- (void)drawAtlas:(nonnull id)renderEncoder +{ + // draw last since this changes pipeline state + if (_showSettings->is3DView && _showSettings->atlas.empty()) + return; + + //if (!_showSettings->drawAtlas) + // return; + + RenderScope drawAtlasScope(renderEncoder, "DrawAtlas"); + + [renderEncoder setTriangleFillMode:MTLTriangleFillModeLines]; + [renderEncoder setDepthBias:5.0 slopeScale:0.0 clamp:0.0]; + [renderEncoder setCullMode:MTLCullModeNone]; + + [renderEncoder setRenderPipelineState:_pipelineStateDrawLines]; + + // TODO: draw line strip with prim reset + // need atlas data in push constants or in vb + + // TOOO: also need to hover name or show names on canvas + + // [renderEncoder setVertexBytes:&uniformsLevel + // length:sizeof(uniformsLevel) + // atIndex:BufferIndexUniformsLevel]; + + UniformsDebug uniformsDebug; + + for (const Atlas& atlas : _showSettings->atlas) { + // not accounting for slice + uniformsDebug.rect = float4m(atlas.x, atlas.y, atlas.w, atlas.h); + + [renderEncoder setVertexBytes:&uniformsDebug + length:sizeof(uniformsDebug) + atIndex:BufferIndexUniformsDebug]; + + // this will draw diagonal + for (MTKSubmesh* submesh in _mesh.submeshes) { + [renderEncoder drawIndexedPrimitives:submesh.primitiveType + indexCount:submesh.indexCount + indexType:submesh.indexType + indexBuffer:submesh.indexBuffer.buffer + indexBufferOffset:submesh.indexBuffer.offset]; + } + } + + // restore state, even though this isn't a true state shadow + [renderEncoder setCullMode:MTLCullModeBack]; + [renderEncoder setDepthBias:0.0 slopeScale:0.0 clamp:0.0]; + [renderEncoder setTriangleFillMode:MTLTriangleFillModeFill]; +} + // want to run samples independent of redrawing the main view - (void)drawSample { @@ -2238,23 +2306,33 @@ - (void)drawSample // copy from texture back to CPU, might be easier using MTLBuffer.contents MTLRegion region = { - {0, 0, 0}, // MTLOrigin - {1, 1, 1} // MTLSize + {0, 0, 0}, // MTLOrigin + {1, 1, 1} // MTLSize }; if (isDrawableBlit) { half4 data16f; [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0]; - data = toFloat4(data16f); + + data = float4m(data16f); } else { [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0]; } - // return the value at the sample - self->_showSettings->textureResult = data; - self->_showSettings->textureResultX = textureLookupX; - self->_showSettings->textureResultY = textureLookupY; + // Call this to update the hud text. + // This makes sure the frame delay is accounted for. + dispatch_async(dispatch_get_main_queue(), ^{ + // return the value at the sample + self->_showSettings->textureResult = data; + self->_showSettings->textureResultX = textureLookupX; + self->_showSettings->textureResultY = textureLookupY; + + [self->_delegateHud updateEyedropperText]; + }); + + // TODO: This completed handler runs long after the hud has updated + // so need to invalidate the hud. So the pixel location is out of date. // printf("Color %f %f %f %f\n", data.x, data.y, data.z, data.w); }]; @@ -2274,7 +2352,7 @@ - (void)drawSamples:(id)commandBuffer renderEncoder.label = @"SampleCompute"; - [renderEncoder pushDebugGroup:@"DrawShape"]; + RenderScope drawShapeScope(renderEncoder, "DrawShape"); UniformsCS uniforms; uniforms.uv.x = lookupX; @@ -2315,8 +2393,12 @@ - (void)drawSamples:(id)commandBuffer break; } + id tex = _colorMap; + if (self.isToggleView && _colorMap && _colorMapView) + tex = _colorMapView; + // input and output texture - [renderEncoder setTexture:_colorMap + [renderEncoder setTexture:tex atIndex:TextureIndexColor]; [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples]; @@ -2329,11 +2411,10 @@ - (void)drawSamples:(id)commandBuffer [renderEncoder dispatchThreads:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; - [renderEncoder popDebugGroup]; [renderEncoder endEncoding]; } -- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size +- (void)mtkView:(nonnull MTKView*)view drawableSizeWillChange:(CGSize)size { // Don't crashing trying to readback from the cached drawable during a resize. _lastDrawableTexture = nil; @@ -2350,50 +2431,47 @@ - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size _showSettings->viewContentScaleFactor = framebufferScale; - [self updateProjTransform]; - + _data->updateProjTransform(); + #if USE_GLTF _gltfRenderer.drawableSize = size; _gltfRenderer.colorPixelFormat = view.colorPixelFormat; _gltfRenderer.depthStencilPixelFormat = view.depthStencilPixelFormat; #endif - - [self updateProjTransform]; + + _data->updateProjTransform(); } #if USE_GLTF // @protocol GLTFAssetLoadingDelegate -- (void)assetWithURL:(NSURL *)assetURL requiresContentsOfURL:(NSURL *)url completionHandler:(void (^)(NSData *_Nullable, NSError *_Nullable))completionHandler +- (void)assetWithURL:(NSURL*)assetURL requiresContentsOfURL:(NSURL*)url completionHandler:(void (^)(NSData* _Nullable, NSError* _Nullable))completionHandler { // This can handle remote assets - NSURLSessionDataTask *task = [_urlSession dataTaskWithURL:url - completionHandler:^(NSData *data, NSURLResponse *response, NSError *error) - { - completionHandler(data, error); - }]; - + NSURLSessionDataTask* task = [_urlSession dataTaskWithURL:url + completionHandler:^(NSData* data, NSURLResponse* response, NSError* error) { + completionHandler(data, error); + }]; + [task resume]; } -- (void)assetWithURL:(NSURL *)assetURL didFinishLoading:(GLTFAsset *)asset +- (void)assetWithURL:(NSURL*)assetURL didFinishLoading:(GLTFAsset*)asset { mylock lock(gModelLock); - + _asset = asset; - + _animationTime = 0.0; - + string fullFilename = assetURL.path.UTF8String; [self updateModelSettings:fullFilename]; } -- (void)assetWithURL:(NSURL *)assetURL didFailToLoadWithError:(NSError *)error; +- (void)assetWithURL:(NSURL*)assetURL didFailToLoadWithError:(NSError*)error; { // TODO: display this error to the user - NSLog(@"Asset load failed with error: %@", error); + KLOGE("Renderer", "Asset load failed with error: %s", [[error localizedDescription] UTF8String]); } #endif - - @end diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index 54ae5d75..74df253e 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -1,8 +1,180 @@ +// kram - Copyright 2020-2025 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + #include "KramViewerBase.h" +// compare perf of these readers +#define USE_SIMDJSON 1 +#if USE_SIMDJSON +#include "simdjson/simdjson.h" +#else +#include "json11/json11.h" +#endif + namespace kram { -using namespace simd; -using namespace NAMESPACE_STL; +using namespace SIMD_NAMESPACE; +using namespace STL_NAMESPACE; + +#define ArrayCount(x) (sizeof(x) / sizeof(x[0])) + +//#ifdef NDEBUG +//bool doPrintPanZoom = false; +//#else +//bool doPrintPanZoom = false; +//#endif + +// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8 +// are still off and need to use the following. +float toSnorm8(float c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } +float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } +float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } +float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } + +float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; } + +inline float4 toPremul(const float4& c) +{ + // premul with a + float4 cpremul = c; + float a = c.a; + cpremul.w = 1.0f; + cpremul *= a; + return cpremul; +} + +inline bool almost_equal_elements(float3 v, float tol) +{ + return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol); +} + +inline const float3x3& toFloat3x3(const float4x4& m) { return (const float3x3&)m; } + +float4 inverseScaleSquared(const float4x4& m) +{ + float3 scaleSquared = float3m(length_squared(m.columns[0].xyz), + length_squared(m.columns[1].xyz), + length_squared(m.columns[2].xyz)); + + // if uniform, then set scaleSquared all to 1 + if (almost_equal_elements(scaleSquared, 1e-5f)) { + scaleSquared = float3m(1.0f); + } + + // don't divide by 0 + float3 invScaleSquared = + recip(SIMD_NAMESPACE::max(float3m(0.0001 * 0.0001), scaleSquared)); + + // identify determinant here for flipping orientation + // all shapes with negative determinant need orientation flipped for + // backfacing and need to be grouned together if rendering with instancing + float det = determinant(toFloat3x3(m)); + + return float4m(invScaleSquared, det); +} + +static string filenameNoExtension(const char* filename) +{ + const char* dotPosStr = strrchr(filename, '.'); + if (dotPosStr == nullptr) + return filename; + auto dotPos = dotPosStr - filename; + + // now chop off the extension + string filenameNoExt = filename; + return filenameNoExt.substr(0, dotPos); +} + +static void findPossibleNormalMapFromAlbedoFilename(const char* filename, vector& normalFilenames) +{ + normalFilenames.clear(); + + string filenameShort = filename; + + const char* ext = strrchr(filename, '.'); + + const char* dotPosStr = strrchr(filenameShort.c_str(), '.'); + if (dotPosStr == nullptr) + return; + + auto dotPos = dotPosStr - filenameShort.c_str(); + + // now chop off the extension + filenameShort = filenameShort.substr(0, dotPos); + + const char* searches[] = {"-a", "-d", "_Color", "_baseColor"}; + + for (uint32_t i = 0; i < ArrayCount(searches); ++i) { + const char* search = searches[i]; + if (endsWith(filenameShort, search)) { + filenameShort = filenameShort.substr(0, filenameShort.length() - strlen(search)); + break; + } + } + + const char* suffixes[] = {"-n", "_normal", "_Normal"}; + + string normalFilename; + for (uint32_t i = 0; i < ArrayCount(suffixes); ++i) { + const char* suffix = suffixes[i]; + + // may need to try various names, and see if any exist + normalFilename = filenameShort; + normalFilename += suffix; + normalFilename += ext; + + normalFilenames.push_back(normalFilename); + } +} + +// this aliases the existing string, so can't chop extension +inline const char* toFilenameShort(const char* filename) +{ + const char* filenameShort = strrchr(filename, '/'); + if (filenameShort == nullptr) { + filenameShort = filename; + } + else { + filenameShort += 1; + } + return filenameShort; +} + +static const vector supportedModelExt = { +#if USE_GLTF + ".gltf", + ".glb", +#endif +#if USE_USD + ".gltf", + ".glb", +#endif +}; + +bool isSupportedModelFilename(const char* filename) +{ + for (const char* ext : supportedModelExt) { + if (endsWithExtension(filename, ext)) { + return true; + } + } + return false; +} +bool isSupportedArchiveFilename(const char* filename) +{ + return endsWithExtension(filename, ".zip"); +} + +bool isSupportedJsonFilename(const char* filename) +{ + return endsWith(filename, "-atlas.json"); +} + +bool isDirectory(const char* filename) +{ + FileHelper fileHelper; + return fileHelper.isDirectory(filename); +} int32_t ShowSettings::totalChunks() const { @@ -12,9 +184,14 @@ int32_t ShowSettings::totalChunks() const std::max(one, sliceCount); } -const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const +File::File(const char* name_, int32_t urlIndex_) + : name(name_), urlIndex(urlIndex_), nameShort(toFilenameShort(name_)) +{ +} + +const char* ShowSettings::meshNumberName(uint32_t meshNumber_) const { - const char *text = ""; + const char* text = ""; switch (meshNumber_) { case 0: @@ -39,9 +216,9 @@ const char *ShowSettings::meshNumberName(uint32_t meshNumber_) const return text; } -const char *ShowSettings::meshNumberText() const +const char* ShowSettings::meshNumberText() const { - const char *text = ""; + const char* text = ""; switch (meshNumber) { case 0: @@ -66,9 +243,9 @@ const char *ShowSettings::meshNumberText() const return text; } -const char *ShowSettings::shapeChannelText() const +const char* ShowSettings::shapeChannelText() const { - const char *text = ""; + const char* text = ""; switch (shapeChannel) { case ShapeChannelNone: @@ -92,7 +269,7 @@ const char *ShowSettings::shapeChannelText() const case ShapeChannelFaceNormal: text = "Show Faces"; break; - // case ShapeChannelBumpNormal: text = "Show Bumps"; break; + // case ShapeChannelBumpNormal: text = "Show Bumps"; break; case ShapeChannelMipLevel: text = "Show Mip Levels"; break; @@ -103,9 +280,9 @@ const char *ShowSettings::shapeChannelText() const return text; } -const char *ShowSettings::debugModeText() const +const char* ShowSettings::debugModeText() const { - const char *text = ""; + const char* text = ""; switch (debugMode) { case DebugModeNone: @@ -141,9 +318,9 @@ const char *ShowSettings::debugModeText() const return text; } -const char *ShowSettings::lightingModeText() const +const char* ShowSettings::lightingModeText() const { - const char *text = ""; + const char* text = ""; switch (lightingMode) { case LightingModeDiffuse: @@ -257,7 +434,7 @@ void ShowSettings::advanceDebugMode(bool decrement) bool isNormal = texContentType == TexContentTypeNormal; bool isSDF = texContentType == TexContentTypeSDF; - + // for normals show directions if (debugMode == DebugModePosX && !(isNormal || isSDF)) { advanceDebugMode(decrement); @@ -283,27 +460,26 @@ void ShowSettings::updateUVPreviewState() if (uvPreview < 1.0) uvPreview += uvPreviewStep; } - else - { + else { if (uvPreview > 0.0) uvPreview -= uvPreviewStep; } - uvPreview = saturate(uvPreview); + uvPreview = std::clamp(uvPreview, 0.0f, 1.0f); } } else { // This hides the uvView even when switchig back to 3d shape //uvPreview = 0.0; } - + // stop the frame update if (uvPreview == 0.0f || uvPreview == 1.0f) { uvPreviewFrames = 0; } } -void printChannels(string &tmp, const string &label, float4 c, +void printChannels(string& tmp, const string& label, float4 c, int32_t numChannels, bool isFloat, bool isSigned) { if (isFloat || isSigned) { @@ -345,114 +521,2647 @@ void printChannels(string &tmp, const string &label, float4 c, } } -float4x4 matrix4x4_translation(float tx, float ty, float tz) +string ShowSettings::windowTitleString(const char* filename) const { - float4x4 m = {(float4){1, 0, 0, 0}, - (float4){0, 1, 0, 0}, - (float4){0, 0, 1, 0}, - (float4){tx, ty, tz, 1}}; - return m; + // set title to filename, chop this to just file+ext, not directory + const char* filenameShort = strrchr(filename, '/'); + if (filenameShort == nullptr) { + filenameShort = filename; + } + else { + filenameShort += 1; + } + + string title = "kramv - "; + + if (isModel) { + title += formatTypeName(originalFormat); + title += " - "; + title += filenameShort; + } + else { + // was using subtitle, but that's macOS 11.0 feature. + title += formatTypeName(originalFormat); + title += " - "; + + // identify what we think the content type is + const char* typeText = ""; + switch (texContentType) { + case TexContentTypeAlbedo: + typeText = "a"; + break; + case TexContentTypeNormal: + typeText = "n"; + break; + case TexContentTypeAO: + typeText = "ao"; + break; + case TexContentTypeMetallicRoughness: + typeText = "mr"; + break; + case TexContentTypeSDF: + typeText = "sdf"; + break; + case TexContentTypeHeight: + typeText = "h"; + break; + case TexContentTypeUnknown: + typeText = ""; + break; + } + title += typeText; + // add some info about the texture to avoid needing to go to info + // srgb src would be useful too. + if (texContentType == TexContentTypeAlbedo && isPremul) { + title += ",p"; + } + title += " - "; + title += filenameShort; + } + + return title; } -float4x4 matrix4x4_rotation(float radians, vector_float3 axis) +//-------------------------------- + +// Want to avoid Apple libs for things that have C++ equivalents. + +Data::Data() { - axis = vector_normalize(axis); - float ct = cosf(radians); - float st = sinf(radians); - float ci = 1 - ct; - float x = axis.x, y = axis.y, z = axis.z; +#if USE_SIMDLIB && KRAM_DEBUG && 0 + vecf vfmt; - float4x4 m = { - (float4){ ct + x * x * ci, y * x * ci + z * st, z * x * ci - y * st, 0}, - (float4){ x * y * ci - z * st, ct + y * y * ci, z * y * ci + x * st, 0}, - (float4){ x * z * ci + y * st, y * z * ci - x * st, ct + z * z * ci, 0}, - (float4){ 0, 0, 0, 1} - }; - return m; + // want to see the simd config + KLOGI("SIMDK", "%s", vfmt.simd_configs().c_str()); + KLOGI("SIMDK", "%s", vfmt.simd_alignments().c_str()); +#endif + + _showSettings = new ShowSettings(); + + _textSlots.resize(kTextSlotCount); +} +Data::~Data() +{ + delete _showSettings; } -float4x4 perspective_rhs(float fovyRadians, float aspectXtoY, float nearZ, float farZ, bool isReverseZ) +void Data::clearAtlas() { - // form tangents - float tanY = tanf(fovyRadians * 0.5f); - float tanX = tanY * aspectXtoY; + _showSettings->atlas.clear(); + _showSettings->lastAtlas = nullptr; +} - // currently symmetric - // all postive values from center - float4 tangents = { tanY, tanY, tanX, tanX }; - tangents *= nearZ; - - float t = tangents.x; - float b = -tangents.y; - float r = tangents.z; - float l = -tangents.w; - - float dx = (r - l); - float dy = (t - b); - - float xs = 2.0f * nearZ / dx; - float ys = 2.0f * nearZ / dy; - - // 0.5x? - float xoff = (r + l) / dx; - float yoff = (t + b) / dy; - - float m22; - float m23; - - if (isReverseZ) { - // zs drops out since zs = inf / -inf = 1, 1-1 = 0 - // z' = near / -z - - m22 = 0; - m23 = nearZ; +#if USE_SIMDJSON + +bool Data::loadAtlasFile(const char* filename) +{ + using namespace simdjson; + + clearAtlas(); + + Timer timer; + + // can just mmap the json + MmapHelper mmap; + if (!mmap.open(filename)) { + KLOGE("kramv", "Failed to open %s", filename); + return false; } - else { - float zs = farZ / (nearZ - farZ); - m22 = zs; - m23 = zs * nearZ; + ondemand::parser parser; + + padded_string json((const char*)mmap.data(), mmap.dataLength()); + auto atlasProps = parser.iterate(json); + + // can we get at memory use numbers to do the parse? + KLOGI("kramv", "parsed %.0f KB of json in %.3fms", + (double)mmap.dataLength() / 1024.0, + timer.timeElapsedMillis()); + + // Can use hover or a show all on these entries and names. + // Draw names on screen using system text in the upper left corner if 1 + // if showing all, then show names across each mip level. May want to + // snap to pixels on each mip level so can see overlap. + + { + std::vector values; + //string_view atlasName = atlasProps["name"].get_string().value_unsafe(); + + uint64_t width = atlasProps["width"].get_uint64().value_unsafe(); + uint64_t height = atlasProps["height"].get_uint64().value_unsafe(); + + uint64_t slice = atlasProps["slice"].get_uint64().value_unsafe(); + + float uPad = 0.0f; + float vPad = 0.0f; + + if (atlasProps["paduv"].get_array().error() != NO_SUCH_FIELD) { + values.clear(); + for (auto value : atlasProps["paduv"]) + values.push_back(value.get_double().value_unsafe()); + + uPad = values[0]; + vPad = values[1]; + } + else if (atlasProps["padpx"].get_array().error() != NO_SUCH_FIELD) { + values.clear(); + for (auto value : atlasProps["padpx"]) + values.push_back(value.get_double().value_unsafe()); + + uPad = values[0]; + vPad = values[1]; + + uPad /= width; + vPad /= height; + } + + for (auto regionProps : atlasProps["regions"]) { + string_view name = regionProps["name"].get_string().value_unsafe(); + + float x = 0.0f; + float y = 0.0f; + float w = 0.0f; + float h = 0.0f; + + if (regionProps["ruv"].get_array().error() != NO_SUCH_FIELD) { + values.clear(); + for (auto value : regionProps["ruv"]) + values.push_back(value.get_double().value_unsafe()); + + // Note: could convert pixel and mip0 size to uv. + // normalized uv make these easier to draw across all mips + x = values[0]; + y = values[1]; + w = values[2]; + h = values[3]; + } + else if (regionProps["rpx"].get_array().error() != NO_SUCH_FIELD) { + values.clear(); + for (auto value : regionProps["rpx"]) + values.push_back(value.get_double().value_unsafe()); + + x = values[0]; + y = values[1]; + w = values[2]; + h = values[3]; + + // normalize to uv using the width/height + x /= width; + y /= height; + w /= width; + h /= height; + } + + const char* verticalProp = "f"; // regionProps["rot"]; + bool isVertical = verticalProp && verticalProp[0] == 't'; + + Atlas atlas = {(string)name, x, y, w, h, uPad, vPad, isVertical, (uint32_t)slice}; + _showSettings->atlas.emplace_back(std::move(atlas)); + } } - - float4x4 m = { - (float4){ xs, 0, 0, 0 }, - (float4){ 0, ys, 0, 0 }, - (float4){ xoff, yoff, m22, -1 }, - (float4){ 0, 0, m23, 0 } - }; - - return m; + + // TODO: also need to be able to bring in vector shapes + // maybe from svg or files written out from figma or photoshop. + // Can triangulate those, and use xatlas to pack those. + // Also xatlas can flatten out a 3d model into a chart. + + return true; +} + +#else + +bool Data::loadAtlasFile(const char* filename) +{ + using namespace json11; + + clearAtlas(); + + // can just mmap the json + MmapHelper mmap; + if (!mmap.open(filename)) { + KLOGE("kramv", "Failed to open %s", filename); + return false; + } + + Timer timer; + JsonReader jsonReader; + const Json* root = jsonReader.read((const char*)mmap.data(), mmap.dataLength()); + string err = jsonReader.error(); + if (!root || !err.empty()) { + KLOGE("kramv", "Failed parsing %s: %s", filename, err.c_str()); + return false; + } + timer.stop(); + + KLOGI("kramv", "parsed %.0f KB of json using %.0f KB of memory in %.3fms", + (double)mmap.dataLength() / 1024.0, + (double)jsonReader.memoryUse() / 1024.0, + timer.timeElapsedMillis()); + + const Json& atlasProps = (*root)[(uint32_t)0]; + + // Can use hover or a show all on these entries and names. + // Draw names on screen using system text in the upper left corner if 1 + // if showing all, then show names across each mip level. May want to + // snap to pixels on each mip level so can see overlap. + + { + std::vector values; + // string_view atlasName = atlasProps["name"].get_string().value_unsafe(); + + int width = atlasProps["width"].int_value(); + int height = atlasProps["height"].int_value(); + + int slice = atlasProps["slice"].int_value(); + + float uPad = 0.0f; + float vPad = 0.0f; + + if (atlasProps["paduv"].is_array()) { + values.clear(); + for (const auto& value : atlasProps["paduv"]) + values.push_back(value.number_value()); + + uPad = values[0]; + vPad = values[1]; + } + else if (atlasProps["padpx"].is_array()) { + values.clear(); + for (const auto& value : atlasProps["padpx"]) + values.push_back(value.number_value()); + + uPad = values[0]; + vPad = values[1]; + + uPad /= width; + vPad /= height; + } + + string decodedName; + for (auto regionProps : atlasProps["regions"]) { + const char* name = regionProps["name"].string_value(decodedName); + + float x = 0.0f; + float y = 0.0f; + float w = 0.0f; + float h = 0.0f; + + if (regionProps["ruv"].is_array()) { + values.clear(); + for (auto value : regionProps["ruv"]) + values.push_back(value.number_value()); + + // Note: could convert pixel and mip0 size to uv. + // normalized uv make these easier to draw across all mips + x = values[0]; + y = values[1]; + w = values[2]; + h = values[3]; + } + else if (regionProps["rpx"].is_array()) { + values.clear(); + for (auto value : regionProps["rpx"]) + values.push_back(value.number_value()); + + x = values[0]; + y = values[1]; + w = values[2]; + h = values[3]; + + // normalize to uv using the width/height + x /= width; + y /= height; + w /= width; + h /= height; + } + + const char* verticalProp = "f"; // regionProps["rot"]; + bool isVertical = verticalProp && verticalProp[0] == 't'; + + Atlas atlas = {name, x, y, w, h, uPad, vPad, isVertical, (uint32_t)slice}; + _showSettings->atlas.emplace_back(std::move(atlas)); + } + } + + // TODO: also need to be able to bring in vector shapes + // maybe from svg or files written out from figma or photoshop. + // Can triangulate those, and use xatlas to pack those. + // Also xatlas can flatten out a 3d model into a chart. + + return true; } -float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ, - bool isReverseZ) +#endif + +// opens archive +bool Data::openArchive(const char* zipFilename, int32_t urlIndex) { - // float aspectRatio = width / height; - float xs = 2.0f / width; - float ys = 2.0f / height; + // grow the array, ptrs so that existing mmaps aren't destroyed + if (urlIndex >= _containers.size()) { + _containers.resize(urlIndex + 1, nullptr); + } - float xoff = 0.0f; // -0.5f * width; - float yoff = 0.0f; // -0.5f * height; + if (_containers[urlIndex] == nullptr) + _containers[urlIndex] = new FileContainer; - float dz = -(farZ - nearZ); - float zs = 1.0f / dz; + FileContainer& container = *_containers[urlIndex]; + MmapHelper& zipMmap = container.zipMmap; + ZipHelper& zip = container.zip; - float m22 = zs; - float m23 = zs * nearZ; + // close any previous zip + zipMmap.close(); - // revZ, can't use infiniteZ with ortho view - if (isReverseZ) { - m22 = -m22; - m23 = 1.0f - m23; + // open the mmap again + if (!zipMmap.open(zipFilename)) { + return false; } + if (!zip.openForRead(zipMmap.data(), zipMmap.dataLength())) { + return false; + } + return true; +} - float4x4 m = { - (float4){xs, 0, 0, 0}, - (float4){0, ys, 0, 0}, - (float4){0, 0, m22, 0}, - (float4){xoff, yoff, m23, 1} +// lists archive into _files +bool Data::listFilesInArchive(int32_t urlIndex) +{ + FileContainer& container = *_containers[urlIndex]; + ZipHelper& zip = container.zip; + + // filter out unsupported extensions + vector extensions = { + ".ktx", ".ktx2", ".png", // textures + ".dds", ".DDS" // allow caps for dds +#if USE_GLTF + // TODO: can't support these until have a loader from memory block + // GLTFAsset requires a URL. + //, ".glb", ".gltf" // models +#endif +#if USE_USD + , + ".usd", ".usda", ".usb" +#endif }; - return m; + + container.zip.filterExtensions(extensions); + + // don't switch to empty archive + if (zip.zipEntrys().empty()) { + return false; + } + + for (const auto& entry : zip.zipEntrys()) { + _files.emplace_back(File(entry.filename, urlIndex)); + } + + return true; +} + +// TODO: can simplify by storing counterpart id when file list is created +bool Data::hasCounterpart(bool increment) +{ + if (_files.size() <= 1) { + return false; + } + + const File& file = _files[_fileIndex]; + string currentFilename = filenameNoExtension(file.nameShort.c_str()); + + uint32_t nextFileIndex = _fileIndex; + + size_t numEntries = _files.size(); + if (increment) + nextFileIndex++; + else + nextFileIndex += numEntries - 1; // back 1 + + nextFileIndex = nextFileIndex % numEntries; + + const File& nextFile = _files[nextFileIndex]; + string nextFilename = filenameNoExtension(nextFile.nameShort.c_str()); + + // if short name matches (no ext) then it's a counterpart + if (currentFilename != nextFilename) + return false; + + return true; +} + +bool Data::advanceCounterpart(bool increment) +{ + if (_files.size() <= 1) { + return false; + } + + // see if file has counterparts + const File& file = _files[_fileIndex]; + string currentFilename = filenameNoExtension(file.nameShort.c_str()); + + // TODO: this should cycle through only the counterparts + uint32_t nextFileIndex = _fileIndex; + + size_t numEntries = _files.size(); + if (increment) + nextFileIndex++; + else + nextFileIndex += numEntries - 1; // back 1 + + nextFileIndex = nextFileIndex % numEntries; + + const File& nextFile = _files[nextFileIndex]; + string nextFilename = filenameNoExtension(nextFile.nameShort.c_str()); + + if (currentFilename != nextFilename) + return false; + + _fileIndex = nextFileIndex; + + return _delegate.loadFile(true); +} + +bool Data::advanceFile(bool increment) +{ + if (_files.empty()) { + return false; + } + + size_t numEntries = _files.size(); + if (increment) + _fileIndex++; + else + _fileIndex += numEntries - 1; // back 1 + + _fileIndex = _fileIndex % numEntries; + + return _delegate.loadFile(true); +} + +bool Data::findFilename(const string& filename) +{ + bool isFound = false; + + // linear search + for (const auto& search : _files) { + if (search.name == filename) { + isFound = true; + break; + } + } + return isFound; +} + +bool Data::findFilenameShort(const string& filename) +{ + bool isFound = false; + + // linear search + for (const auto& search : _files) { + if (search.nameShort == filename) { + isFound = true; + break; + } + } + return isFound; +} + +const File* Data::findFileShort(const string& filename) +{ + // linear search + for (const auto& search : _files) { + if (search.nameShort == filename) { + return &search; + } + } + return nullptr; +} + +// rect here is expect xy, wh +bool isPtInRect(float2 pt, float4 r) +{ + return all((pt >= r.xy) & (pt <= r.xy + r.zw)); +} + +const Atlas* Data::findAtlasAtUV(float2 pt) +{ + if (_showSettings->atlas.empty()) return nullptr; + if (_showSettings->imageBoundsX == 0) return nullptr; + + const Atlas* atlas = nullptr; + + // Note: rects are in uv + + // This might need to become an atlas array index instead of ptr + const Atlas* lastAtlas = _showSettings->lastAtlas; + + if (lastAtlas) { + if (isPtInRect(pt, lastAtlas->rect())) { + atlas = lastAtlas; + } + } + + if (!atlas) { + // linear search + for (const auto& search : _showSettings->atlas) { + if (isPtInRect(pt, search.rect())) { + atlas = &search; + break; + } + } + + _showSettings->lastAtlas = atlas; + } + + return atlas; +} + +bool Data::isArchive() const +{ + //NSArray* urls_ = (NSArray*)_delegate._urls; + //NSURL* url = urls_[_files[_fileIndex].urlIndex]; + //const char* filename = url.fileSystemRepresentation; + + string filename = _urls[_files[_fileIndex].urlIndex]; + return isSupportedArchiveFilename(filename.c_str()); +} + +void Data::setPerfDirectory(const char* directory) +{ + Perf* perf = Perf::instance(); + perf->setPerfDirectory(directory); +} + +bool Data::loadFile() +{ + if (isArchive()) { + return loadFileFromArchive(); + } + + // now lookup the filename and data at that entry + const File& file = _files[_fileIndex]; + const char* filename = file.name.c_str(); + + string fullFilename = filename; + auto timestamp = FileHelper::modificationTimestamp(filename); + + bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp); + if (!isTextureChanged) { + return true; + } + +#if USE_GLTF || USE_USD + bool isModel = isSupportedModelFilename(filename); + if (isModel) { + bool success = _delegate.loadModelFile(filename); + + if (success) { + // store the filename + _showSettings->lastFilename = filename; + _showSettings->lastTimestamp = timestamp; + } + + return success; + } +#endif + + // have already filtered filenames out, so this should never get hit + if (!isSupportedFilename(filename)) { + return false; + } + + // Note: better to extract from filename instead of root of folder dropped + // or just keep displaying full path of filename. + + _archiveName.clear(); + + vector possibleNormalFilenames; + string normalFilename; + bool hasNormal = false; + + TexContentType texContentType = findContentTypeFromFilename(filename); + if (texContentType == TexContentTypeAlbedo) { + findPossibleNormalMapFromAlbedoFilename(filename, possibleNormalFilenames); + + for (const auto& name : possibleNormalFilenames) { + hasNormal = findFilename(name); + + if (hasNormal) { + normalFilename = name; + break; + } + } + } + + // see if there is an atlas file too, and load the rectangles for preview + // note sidecar atlas files are a pain to view with a sandbox, may want to + // splice into ktx/ktx2 files, but no good metadata for png/dds. + _showSettings->atlas.clear(); + + string atlasFilename = filenameNoExtension(filename); + bool hasAtlas = false; + + // replace -a, -d, with -atlas.jsonc + const char* dashPosStr = strrchr(atlasFilename.c_str(), '-'); + if (dashPosStr != nullptr) { + atlasFilename = atlasFilename.substr(0, dashPosStr - atlasFilename.c_str()); + } + atlasFilename += "-atlas.json"; + if (findFilename(atlasFilename.c_str())) { + if (loadAtlasFile(atlasFilename.c_str())) { + hasAtlas = true; + } + } + if (!hasAtlas) { + clearAtlas(); + atlasFilename.clear(); + } + + // If it's a compressed file, then set a diff target if a corresponding png + // is found. Eventually see if a src dds/ktx/ktx2 exists. Want to stop + // using png as source images. Note png don't have custom mips, unless + // flattened to one image. So have to fabricate mips here. KTXImage + // can already load up striped png into slices, etc. + + bool hasDiff = false; + string diffFilename; + + if (!isPNGFilename(filename)) { + diffFilename = filenameNoExtension(filename); + diffFilename += ".png"; + + diffFilename = toFilenameShort(diffFilename.c_str()); + if (diffFilename != filename) { + const File* diffFile = findFileShort(diffFilename.c_str()); + if (diffFile) { + diffFilename = diffFile->name; + hasDiff = true; + } + } + + if (!hasDiff) + diffFilename.clear(); + } + + //------------------------------- + + KTXImage image; + KTXImageData imageDataKTX; + + KTXImage imageNormal; + KTXImageData imageNormalDataKTX; + + KTXImage imageDiff; + KTXImageData imageDiffDataKTX; + + // this requires decode and conversion to RGBA8u + if (!imageDataKTX.open(fullFilename.c_str(), image)) { + return false; + } + + // load up the diff, but would prefer to defer this + if (hasDiff && !imageDiffDataKTX.open(diffFilename.c_str(), imageDiff)) { + hasDiff = false; + + // TODO: could also compare dimensions to see if same + + if (imageDiff.textureType == image.textureType && + (imageDiff.textureType == MyMTLTextureType2D)) { + } + else { + hasDiff = false; + } + } + + if (hasNormal && + imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) { + // shaders only pull from albedo + normal on these texture types + if (imageNormal.textureType == image.textureType && + (imageNormal.textureType == MyMTLTextureType2D || + imageNormal.textureType == MyMTLTextureType2DArray)) { + // hasNormal = true; + } + else { + hasNormal = false; + } + } + + //--------------------------------- + + if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, + image, + hasNormal ? &imageNormal : nullptr, + hasDiff ? &imageDiff : nullptr, + false)) { + return false; + } + + // store the filename + _showSettings->lastFilename = filename; + _showSettings->lastTimestamp = timestamp; + + return true; +} + +bool Data::loadFileFromArchive() +{ + // now lookup the filename and data at that entry + const File& file = _files[_fileIndex]; + FileContainer& container = *_containers[file.urlIndex]; + ZipHelper& zip = container.zip; + + const char* filename = file.name.c_str(); + const auto* entry = zip.zipEntry(filename); + string fullFilename = entry->filename; + double timestamp = (double)entry->modificationDate; + + bool isTextureChanged = _showSettings->isFileChanged(filename, timestamp); + if (!isTextureChanged) { + return true; + } + + // TODO: don't have a version which loads gltf model from memory block + // bool isModel = isSupportedModelFilename(filename); + // if (isModel) + // return [self loadModelFile:filename]; + + //-------- + + if (!isSupportedFilename(filename)) { + return false; + } + + // TODO: right now -atlas.json even if already loaded loose + // won't apply to archive textures. Would the -atlas.json file + // need to be in the same archive? + bool hasAtlas = false; + if (!hasAtlas) { + clearAtlas(); + } + + KPERFT("loadFileFromArchive"); + + const uint8_t* imageData = nullptr; + uint64_t imageDataLength = 0; + + // DONE: logic is bust below. Can only use extractRaw + // if the file in the archive isn't compressed. Have Apple + // zip that compressed png files. So then the raw ptr/size + // needs deflated. + bool isFileUncompressed = entry->compressedSize == entry->uncompressedSize; + + vector bufferForImage; + + if (isFileUncompressed) { + KPERFT("ZipExtractRaw"); + + // search for main file - can be albedo or normal + if (!zip.extractRaw(filename, &imageData, imageDataLength)) { + return false; + } + } + else { + KPERFT("ZipExtract"); + + // need to decompress first + if (!zip.extract(filename, bufferForImage)) { + return false; + } + + imageData = bufferForImage.data(); + imageDataLength = bufferForImage.size(); + } + + vector bufferForNormal; + + const uint8_t* imageNormalData = nullptr; + uint64_t imageNormalDataLength = 0; + + string normalFilename; + bool hasNormal = false; + vector normalFilenames; + + TexContentType texContentType = findContentTypeFromFilename(filename); + if (texContentType == TexContentTypeAlbedo) { + findPossibleNormalMapFromAlbedoFilename(filename, normalFilenames); + + for (const auto& name : normalFilenames) { + const auto* normalEntry = zip.zipEntry(name.c_str()); + + hasNormal = normalEntry != nullptr; + if (hasNormal) { + normalFilename = name; + + bool isNormalUncompressed = normalEntry->compressedSize == normalEntry->uncompressedSize; + + if (isNormalUncompressed) { + KPERFT("ZipExtractRawNormal"); + + zip.extractRaw(name.c_str(), &imageNormalData, + imageNormalDataLength); + } + else { + KPERFT("ZipExtractNormal"); + + // need to decompress first + if (!zip.extract(filename, bufferForNormal)) { + return false; + } + + imageNormalData = bufferForNormal.data(); + imageNormalDataLength = bufferForNormal.size(); + } + break; + } + } + } + + //--------------------------- + + // files in archive are just offsets into the mmap + // That's why we can't just pass filenames to the renderer + KTXImage image; + KTXImageData imageDataKTX; + + KTXImage imageNormal; + KTXImageData imageNormalDataKTX; + + // TODO: do imageDiff here? + + KPERFT_START(1, "KTXOpen"); + + if (!imageDataKTX.open(imageData, imageDataLength, image)) { + return false; + } + + KPERFT_STOP(1); + + if (hasNormal) { + KPERFT("KTXOpenNormal"); + + if (imageNormalDataKTX.open( + imageNormalData, imageNormalDataLength, imageNormal)) { + // shaders only pull from albedo + normal on these texture types + if (imageNormal.textureType == image.textureType && + (imageNormal.textureType == MyMTLTextureType2D || + imageNormal.textureType == MyMTLTextureType2DArray)) { + // hasNormal = true; + } + else { + hasNormal = false; + } + } + } + + //--------------------------------- + + KPERFT_START(3, "KTXLoad"); + + if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, nullptr, true)) { + return false; + } + + KPERFT_STOP(3); + + //--------------------------------- + + string archiveURL = _urls[file.urlIndex]; + _archiveName = toFilenameShort(archiveURL.c_str()); + + return true; +} + +void Data::loadFilesFromUrls(vector& urls, bool skipSubdirs) +{ + // Using a member for archives, so limited to one archive in a drop + // but that's probably okay for now. Add a separate array of open + // archives if want > 1. + + // copy the existing files list + string existingFilename; + if (_fileIndex < (int32_t)_files.size()) + existingFilename = _files[_fileIndex].name; + + // Fill this out again + _files.clear(); + + // clear pointers + for (FileContainer* container : _containers) + delete container; + _containers.clear(); + + // this will flatten the list + int32_t urlIndex = 0; + + vector urlsExtracted; + + for (const auto& url : urls) { + // These will flatten out to a list of files + const char* filename = url.c_str(); + + if (isSupportedArchiveFilename(filename) && + openArchive(filename, urlIndex) && + listFilesInArchive(urlIndex)) { + urlsExtracted.push_back(filename); + urlIndex++; + } + else if (isDirectory(filename)) { + // this first loads only models, then textures if only those + listFilesInFolder(url, urlIndex, skipSubdirs); + + // could skip if nothing added + urlsExtracted.push_back(url); + urlIndex++; + + // handle archives within folder + vector archiveFiles; + listArchivesInFolder(url, archiveFiles, skipSubdirs); + + for (const File& archiveFile : archiveFiles) { + const char* archiveFilename = archiveFile.name.c_str(); + if (openArchive(archiveFilename, urlIndex) && + listFilesInArchive(urlIndex)) { + //NSURL* urlArchive = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename]]; + //[urlsExtracted addObject:urlArchive]; + urlsExtracted.push_back(archiveFilename); + urlIndex++; + } + } + } + else if (isSupportedFilename(filename) +#if USE_GLTF + || isSupportedModelFilename(filename) +#endif + ) { + _files.emplace_back(File(filename, urlIndex)); + + //[urlsExtracted addObject:url]; + urlsExtracted.push_back(filename); + urlIndex++; + } + else if (isSupportedJsonFilename(filename)) { + _files.emplace_back(File(filename, urlIndex)); + + //[urlsExtracted addObject:url]; + urlsExtracted.push_back(filename); + urlIndex++; + } + } + + // sort them by short filename +#if USE_EASTL + STL_NAMESPACE::quick_sort(_files.begin(), _files.end()); +#else + STL_NAMESPACE::sort(_files.begin(), _files.end()); +#endif + + // preserve filename before load, and restore that index, by finding + // that name in refreshed folder list + _fileIndex = 0; + if (!existingFilename.empty()) { + for (uint32_t i = 0; i < _files.size(); ++i) { + if (_files[i].name == existingFilename) { + _fileIndex = i; + break; + } + } + } + + // preserve old file selection + _urls = urlsExtracted; +} + +void Data::showEyedropperData(const float2& uv) +{ + string text; + string tmp; + + float4 c = _showSettings->textureResult; + int32_t x = _showSettings->textureResultX; + int32_t y = _showSettings->textureResultY; + + // DONE: use these to format the text + MyMTLPixelFormat format = _showSettings->originalFormat; + bool isSrgb = isSrgbFormat(format); + bool isSigned = isSignedFormat(format); + + bool isHdr = isHdrFormat(format); + bool isFloat = isHdr; + + int32_t numChannels = _showSettings->numChannels; + + bool isNormal = _showSettings->texContentType == TexContentTypeNormal; + bool isColor = !isNormal; + + bool isDirection = false; + bool isValue = false; + + if (_showSettings->isEyedropperFromDrawable()) { + // TODO: could write barycentric, then lookup uv from that + // then could show the block info. + + // interpret based on shapeChannel, debugMode, etc + switch (_showSettings->shapeChannel) { + case ShapeChannelDepth: + isSigned = false; // using fract on uv + + isValue = true; + isFloat = true; + numChannels = 1; + break; + case ShapeChannelUV0: + isSigned = false; // using fract on uv + + isValue = true; + isFloat = true; + numChannels = 2; // TODO: fix for 3d uvw + break; + + case ShapeChannelFaceNormal: + case ShapeChannelNormal: + case ShapeChannelTangent: + case ShapeChannelBitangent: + isDirection = true; + numChannels = 3; + + // convert unorm to snnorm + c = toSnorm(c); + break; + + case ShapeChannelMipLevel: + isValue = true; + isSigned = false; + isFloat = true; + + // viz is mipNumber as alpha + numChannels = 1; + c.r = 4.0 - (c.a * 4.0); + break; + + default: + break; + } + + // TODO: indicate px, mip, etc (f.e. showAll) + + // debug mode + + // preview vs. not + } + else { + // this will be out of sync with gpu eval, so may want to only display px + // from returned lookup this will always be a linear color + + // show uv, so can relate to gpu coordinates stored in geometry and find + // atlas areas + append_sprintf(text, "uv:%0.3f %0.3f\n", + (float)x / _showSettings->imageBoundsX, + (float)y / _showSettings->imageBoundsY); + + // pixel at top-level mip + append_sprintf(text, "px:%d %d\n", x, y); + + // show block num + int mipLOD = _showSettings->mipNumber; + + int mipX = _showSettings->imageBoundsX; + int mipY = _showSettings->imageBoundsY; + + mipX = mipX >> mipLOD; + mipY = mipY >> mipLOD; + + mipX = std::max(1, mipX); + mipY = std::max(1, mipY); + + mipX = (int32_t)(uv.x * mipX); + mipY = (int32_t)(uv.y * mipY); + + // TODO: may want to return mip in pixel readback + // don't have it right now, so don't display if preview is enabled + if (_showSettings->isPreview) + mipLOD = 0; + + auto blockDims = blockDimsOfFormat(format); + if (blockDims.x > 1) + append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x, + mipY / blockDims.y); + + // TODO: on astc if we have original blocks can run analysis from + // astc-encoder about each block. + + // show the mip pixel (only if not preview and mip changed) + if (mipLOD > 0 && !_showSettings->isPreview) + append_sprintf(text, "mpx: %d %d\n", mipX, mipY); + + // TODO: more criteria here, can have 2 channel PBR metal-roughness + // also have 4 channel normals where zw store other data. + + bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); + if (isSigned && !isDecodeSigned) { + c = toSnorm8(c); + } + } + + if (isValue) { + printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned); + text += tmp; + } + else if (isDirection) { + // print direction + isFloat = true; + isSigned = true; + + printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned); + text += tmp; + } + else if (isNormal) { + float nx = c.x; + float ny = c.y; + + // unorm -> snorm + if (!isSigned) { + nx = toSnorm8(nx); + ny = toSnorm8(ny); + } + + // Note: not clamping nx,ny to < 1 like in shader + + // this is always postive on tan-space normals + // assuming we're not viewing world normals + const float maxLen2 = 0.999 * 0.999; + float len2 = nx * nx + ny * ny; + if (len2 > maxLen2) + len2 = maxLen2; + + float nz = sqrt(1.0f - len2); + + // print the underlying color (some nmaps are xy in 4 channels) + printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); + text += tmp; + + // print direction + float4 d = float4m(nx, ny, nz, 0.0f); + isFloat = true; + isSigned = true; + printChannels(tmp, "dir: ", d, 3, isFloat, isSigned); + text += tmp; + } + else if (isColor) { + // DONE: write some print helpers based on float4 and length + printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); + text += tmp; + + if (isSrgb) { + // this saturates the value, so don't use for extended srgb + float4 s = linearToSRGB(c); + + printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned); + text += tmp; + } + + // display the premul values too, but not fully transparent pixels + if (c.a > 0.0 && c.a < 1.0f) { + printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); + text += tmp; + + // TODO: do we need the premul srgb color too? + if (isSrgb) { + // this saturates the value, so don't use for extended srgb + float4 s = linearToSRGB(c); + + printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, + isSigned); + text += tmp; + } + } + } + + setEyedropperText(text.c_str()); + + // TODO: range display of pixels is useful, only showing pixels that fall + // within a given range, but would need slider then, and determine range of + // pixels. + // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1). + + // TODO: display histogram from compute, bin into buffer counts of pixels + + // DONE: stop clobbering hud text, need another set of labels + // and a zoom preview of the pixels under the cursor. + // Otherwise, can't really see the underlying color. + + // TODO: Stuff these on clipboard with a click, or use cmd+C? +} + +void Data::setEyedropperText(const char* text) +{ + setTextSlot(kTextSlotEyedropper, text); +} + +void Data::setAtlasText(const char* text) +{ + setTextSlot(kTextSlotAtlas, text); +} + +string Data::textFromSlots(bool isFileListHidden) const +{ + // combine textSlots + string text = _textSlots[kTextSlotHud]; + if (!text.empty() && text.back() != '\n') + text += "\n"; + + // don't show eyedropper text with table up, it's many lines and overlaps + if (!isFileListHidden) { + text += _textSlots[kTextSlotEyedropper]; + if (!text.empty() && text.back() != '\n') + text += "\n"; + + text += _textSlots[kTextSlotAtlas]; + } + + return text; +} + +void Data::setTextSlot(TextSlot slot, const char* text) +{ + _textSlots[slot] = text; +} + +void Data::updateUIAfterLoad() +{ + // TODO: move these to actions, and test their state instead of looking up + // buttons here and in HandleKey. + + // base on showSettings, hide some fo the buttons + bool isShowAllHidden = + _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1; + + bool isArrayHidden = _showSettings->arrayCount <= 1; + bool isFaceSliceHidden = + _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; + bool isMipHidden = _showSettings->mipCount <= 1; + + bool isJumpToNextHidden = _files.size() <= 1; + + bool isJumpToCounterpartHidden = true; + bool isJumpToPrevCounterpartHidden = true; + + if (_files.size() > 1) { + isJumpToCounterpartHidden = !hasCounterpart(true); + isJumpToPrevCounterpartHidden = !hasCounterpart(false); + } + + bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba + bool isGreenHidden = _showSettings->numChannels <= 1; + bool isBlueHidden = _showSettings->numChannels <= 2 && + _showSettings->texContentType != TexContentTypeNormal; // reconstruct z = b on normals + + // TODO: also need a hasAlpha for pixels, since many compressed formats like + // ASTC always have 4 channels but internally store R,RG01,... etc. Can get + // more data from swizzle in the props. Often alpha doesn't store anything + // useful to view. + + // DONE: may want to disable isPremul on block textures that already have + // premul in data or else premul is applied a second time to the visual + + bool hasAlpha = _showSettings->numChannels >= 3; + + bool isAlphaHidden = !hasAlpha; + bool isPremulHidden = !hasAlpha; + bool isCheckerboardHidden = !hasAlpha; + + bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat); + bool isPlayHidden = !_showSettings->isModel; // only for models + + bool isDiffHidden = false; // only for images + if (!_showSettings->isModel && _showSettings->hasDiffTexture) { + isDiffHidden = false; + } + _actionPlay->setHidden(isPlayHidden); + _actionArray->setHidden(isArrayHidden); + _actionFace->setHidden(isFaceSliceHidden); + _actionMip->setHidden(isMipHidden); + _actionShowAll->setHidden(isShowAllHidden); + + _actionDiff->setHidden(isDiffHidden); + _actionItem->setHidden(isJumpToNextHidden); + _actionPrevItem->setHidden(isJumpToNextHidden); + + _actionCounterpart->setHidden(isJumpToCounterpartHidden); + _actionPrevCounterpart->setHidden(isJumpToPrevCounterpartHidden); + + _actionR->setHidden(isRedHidden); + _actionG->setHidden(isGreenHidden); + _actionB->setHidden(isBlueHidden); + _actionA->setHidden(isAlphaHidden); + + _actionPremul->setHidden(isPremulHidden); + _actionSigned->setHidden(isSignedHidden); + _actionChecker->setHidden(isCheckerboardHidden); + + // only allow srgb to be disabled, not toggle on if off at load + MyMTLPixelFormat format = _showSettings->originalFormat; + bool isSrgb = isSrgbFormat(format); + _actionSrgb->setHidden(!isSrgb); + + // also need to call after each toggle + updateUIControlState(); +} + +void Data::updateUIControlState() +{ + // there is also mixed state, but not using that + auto On = true; + auto Off = false; + +#define toState(x) (x) ? On : Off + + auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips); + auto premulState = toState(_showSettings->doShaderPremul); + auto signedState = toState(_showSettings->isSigned); + auto checkerboardState = toState(_showSettings->isCheckerboardShown); + auto previewState = toState(_showSettings->isPreview); + auto gridState = toState(_showSettings->isAnyGridShown()); + auto wrapState = toState(_showSettings->isWrap); + auto debugState = toState(_showSettings->debugMode != DebugModeNone); + auto hudState = toState(_showSettings->isHudShown); + + TextureChannels& channels = _showSettings->channels; + + auto redState = toState(channels == TextureChannels::ModeR001); + auto greenState = toState(channels == TextureChannels::Mode0G01); + auto blueState = toState(channels == TextureChannels::Mode00B1); + auto alphaState = toState(channels == TextureChannels::ModeAAA1); + + auto arrayState = toState(_showSettings->arrayNumber > 0); + auto faceState = toState(_showSettings->faceNumber > 0); + auto mipState = toState(_showSettings->mipNumber > 0); + + auto meshState = toState(_showSettings->meshNumber > 0); + auto meshChannelState = toState(_showSettings->shapeChannel > 0); + auto lightingState = + toState(_showSettings->lightingMode != LightingModeNone); + auto tangentState = toState(_showSettings->useTangent); + + // TODO: shadow the state on these, so don't have to to go ObjC + //Renderer* renderer = (Renderer*)self.delegate; + auto playState = toState(_showSettings->isModel && _showSettings->isPlayAnimations); + auto verticalState = toState(_showSettings->isVerticalUI); + auto uiState = toState(_showSettings->isHideUI); + auto diffState = toState(_showSettings->isDiff && _showSettings->hasDiffTexture); + + auto srgbState = toState(_showSettings->isSRGBShown); + auto perfState = toState(_showSettings->isPerf); + + _actionVertical->setHighlight(verticalState); + + // TODO: pass boolean, and change in the call + _actionPlay->setHighlight(playState); + _actionHelp->setHighlight(Off); + _actionInfo->setHighlight(Off); + _actionHud->setHighlight(hudState); + + _actionArray->setHighlight(arrayState); + _actionFace->setHighlight(faceState); + _actionMip->setHighlight(mipState); + + // these never show check state + _actionItem->setHighlight(Off); + _actionPrevItem->setHighlight(Off); + + _actionCounterpart->setHighlight(Off); + _actionPrevCounterpart->setHighlight(Off); + + _actionHideUI->setHighlight(uiState); // note below button always off, menu has state + + _actionR->setHighlight(redState); + _actionG->setHighlight(greenState); + _actionB->setHighlight(blueState); + _actionA->setHighlight(alphaState); + + _actionShowAll->setHighlight(showAllState); + _actionPreview->setHighlight(previewState); + _actionDiff->setHighlight(diffState); + _actionShapeMesh->setHighlight(meshState); + _actionShapeChannel->setHighlight(meshChannelState); + _actionLighting->setHighlight(lightingState); + _actionWrap->setHighlight(wrapState); + _actionGrid->setHighlight(gridState); + _actionDebug->setHighlight(debugState); + _actionTangent->setHighlight(tangentState); + + _actionPremul->setHighlight(premulState); + _actionSigned->setHighlight(signedState); + _actionChecker->setHighlight(checkerboardState); + + _actionSrgb->setHighlight(srgbState); + _actionPerf->setHighlight(perfState); +} + +// TODO: convert to C++ actions, and then call into Base holding all this +// move pan/zoom logic too. Then use that as start of Win32 kramv. + +const Action* Data::actionFromMenu(kram_id menuItem) const +{ + const Action* action = nullptr; + + for (const auto& search : _actions) { + if (search.menuItem == menuItem) { + action = &search; + break; + } + } + + return action; +} + +const Action* Data::actionFromButton(kram_id button) const +{ + const Action* action = nullptr; + + for (const auto& search : _actions) { + if (search.button == button) { + action = &search; + break; + } + } + + return action; +} + +const Action* Data::actionFromKey(uint32_t keyCode) const +{ + const Action* action = nullptr; + + for (const auto& search : _actions) { + if (search.keyCode == keyCode) { + action = &search; + break; + } + } + + return action; +} + +void Data::setLoadedText(string& text) +{ + text = "Loaded "; + + string filename = _showSettings->lastFilename; + text += toFilenameShort(filename.c_str()); + + // archives and file systems have folders, split that off + string folderName; + const char* slashPos = strrchr(filename.c_str(), '/'); + if (slashPos != nullptr) { + folderName = filename.substr(0, slashPos - filename.c_str()); + } + + if (!folderName.empty()) { + text += " in folder "; + text += folderName; + } + + if (!_archiveName.empty()) { + text += " from archive "; + text += _archiveName; + } +} + +void Data::setFailedText(const string& filename, string& text) +{ + text = "Failed "; + + // This doesn't advance with failure + //string filename = _showSettings->lastFilename; + + text += toFilenameShort(filename.c_str()); + + // archives and file systems have folders, split that off + string folderName; + const char* slashPos = strrchr(filename.c_str(), '/'); + if (slashPos != nullptr) { + folderName = filename.substr(0, slashPos - filename.c_str()); + } + + if (!folderName.empty()) { + text += " in folder "; + text += folderName; + } + + if (!_archiveName.empty()) { + text += " from archive "; + text += _archiveName; + } +} + +void Data::initActions() +{ + // Don't reorder without also matching actionPtrs below + Action actions[] = { + Action("?", "Help", Key::Slash), + Action("I", "Info", Key::I), + Action("H", "Hud", Key::H), + Action("U", "UI", Key::U), + Action("V", "UI Vertical", Key::V), + + Action("Q", "Quick Diff", Key::Q), // C/D already taken + Action("D", "Debug", Key::D), + Action("G", "Grid", Key::G), + Action("B", "Checkerboard", Key::B), + + Action("", "", Key::A), // sep + + Action("P", "Preview", Key::P), + Action("W", "Wrap", Key::W), + Action("8", "Premul", Key::Num8), + Action("7", "Signed", Key::Num7), + + Action("", "", Key::A), // sep + + Action("A", "Show All", Key::A), + Action("M", "Mip", Key::M), + Action("F", "Face", Key::F), + Action("Y", "Array", Key::Y), + Action("9", "Srgb", Key::Num9), + Action("5", "Perf", Key::Num5), // really a debug action + + Action("↑", "Prev Item", Key::UpArrow), + Action("↓", "Next Item", Key::DownArrow), + Action("←", "Prev Counterpart", Key::LeftArrow), + Action("→", "Next Counterpart", Key::RightArrow), + + Action("R", "Reload", Key::R), + Action("0", "Fit", Key::Num0), + + Action("", "", Key::A), // sep + + Action(" ", "Play", Key::Space), + Action("6", "Shape UVPreview", Key::Num6), + Action("S", "Shape", Key::S), + Action("C", "Shape Channel", Key::C), + Action("L", "Lighting", Key::L), + Action("T", "Tangents", Key::T), + + Action("", "", Key::A), // sep + + // make these individual toggles and exclusive toggle off shift + Action("1", "Red", Key::Num1), + Action("2", "Green", Key::Num2), + Action("3", "Blue", Key::Num3), + Action("4", "Alpha", Key::Num4), + }; + + // These have to be in same order as above. May want to go back to search for text above. + Action** actionPtrs[] = { + &_actionHelp, + &_actionInfo, + &_actionHud, + &_actionHideUI, + &_actionVertical, + + &_actionDiff, + &_actionDebug, + &_actionGrid, + &_actionChecker, + + &_actionPreview, + &_actionWrap, + &_actionPremul, + &_actionSigned, + + &_actionShowAll, + &_actionMip, + &_actionFace, + &_actionArray, + &_actionSrgb, + &_actionPerf, + + &_actionPrevItem, + &_actionItem, + &_actionPrevCounterpart, + &_actionCounterpart, + + &_actionReload, + &_actionFit, + + &_actionPlay, + &_actionShapeUVPreview, + &_actionShapeMesh, + &_actionShapeChannel, + &_actionLighting, + &_actionTangent, + + &_actionR, + &_actionG, + &_actionB, + &_actionA, + }; + + uint32_t numActions = ArrayCount(actions); + + // copy all of them to a vector, and then assign the action ptrs + for (int32_t i = 0; i < numActions; ++i) { + Action& action = actions[i]; + _actions.push_back(action); + } + + // now alias Actions to the vector above + uint32_t counter = 0; + for (int32_t i = 0; i < _actions.size(); ++i) { + // skip separators + Action& action = _actions[i]; + const char* icon = action.icon; // single char + bool isSeparator = icon[0] == 0; + if (isSeparator) continue; + + *(actionPtrs[counter++]) = &_actions[i]; + } + KASSERT(counter == ArrayCount(actionPtrs)); +} + +void Data::initDisabledButtons() +{ + // don't want these buttons showing up, menu only + _actionPrevItem->disableButton(); + _actionItem->disableButton(); + _actionPrevCounterpart->disableButton(); + _actionCounterpart->disableButton(); + + _actionHud->disableButton(); + _actionHelp->disableButton(); + _actionHideUI->disableButton(); + _actionVertical->disableButton(); +} + +void Data::updateEyedropper() +{ + if ((!_showSettings->isHudShown)) { + return; + } + + if (_showSettings->imageBoundsX == 0) { + // TODO: this return will leave old hud text up + return; + } + + // getting a lot of repeat cursor locations + // could have panning underneath cursor to deal with + if (_showSettings->lastCursorX == _showSettings->cursorX && + _showSettings->lastCursorY == _showSettings->cursorY) { + return; + } + + if (_showSettings->isEyedropperFromDrawable()) { + _showSettings->lastCursorX = _showSettings->cursorX; + _showSettings->lastCursorY = _showSettings->cursorY; + + // This just samples from drawable, so no re-render is needed + // showEyedropperData(float2m(0, 0)); + return; + } + + // don't wait on renderer to update this matrix + float4x4 projectionViewModelMatrix = + computeImageTransform(_showSettings->panX, + _showSettings->panY, + _showSettings->zoom); + + // convert to clip space, or else need to apply additional viewport transform + float halfX = _showSettings->viewSizeX * 0.5f; + float halfY = _showSettings->viewSizeY * 0.5f; + + // sometimes get viewSizeX that's scaled by retina, and other times not. + // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on + // retina display) now passing down drawableSize instead of view.bounds.size + halfX /= (float)_showSettings->viewContentScaleFactor; + halfY /= (float)_showSettings->viewContentScaleFactor; + + float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f); + + float4x4 pixelToClipTfm = + { + (float4){halfX, 0, 0, 0}, + (float4){0, -halfY, 0, 0}, + (float4){0, 0, 1, 0}, + (float4){halfX, halfY, 0, 1}, + }; + pixelToClipTfm = inverse(pixelToClipTfm); + + cursor = pixelToClipTfm * cursor; + + //float4 clipPoint; + //clipPoint.x = (point.x - halfX) / halfX; + //clipPoint.y = -(point.y - halfY) / halfY; + + // convert point in window to point in texture + float4x4 mInv = inverse(projectionViewModelMatrix); + + float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f); + pixel.xyz /= pixel.w; // in case perspective used + + float ar = _showSettings->imageAspectRatio(); + + // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space + pixel.x = (pixel.x / ar + 0.5f); + pixel.y = (-pixel.y + 0.5f); + + //pixel.x *= 0.999f; + //pixel.y *= 0.999f; + + float2 uv = pixel.xy; + + // pixels are 0 based + pixel.x *= _showSettings->imageBoundsX; + pixel.y *= _showSettings->imageBoundsY; + + // TODO: finish this logic, need to account for gaps too, and then isolate to + // a given level and mip to sample + // if (_showSettings->isShowingAllLevelsAndMips) { + // pixel.x *= _showSettings->totalChunks(); + // pixel.y *= _showSettings->mipCount; + // } + + // TODO: clearing out the last px visited makes it hard to gather data + // put value on clipboard, or allow click to lock the displayed pixel and + // value. Might just change header to px(last): ... + string text; + + bool outsideImageBounds = + pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX || + pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY; + + // only display pixel if over image + if (outsideImageBounds) { + sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y); + setEyedropperText(text.c_str()); // ick + _showSettings->outsideImageBounds = true; + } + else { + // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it + // back. + + int32_t newX = (int32_t)pixel.x; + int32_t newY = (int32_t)pixel.y; + + if (_showSettings->outsideImageBounds || + (_showSettings->textureLookupX != newX || + _showSettings->textureLookupY != newY)) { + // Note: this only samples from the original texture via compute shaders + // so preview mode pixel colors are not conveyed. But can see underlying + // data driving preview. + + _showSettings->outsideImageBounds = false; + + // %.0f rounds the value, but want truncation + _showSettings->textureLookupX = newX; + _showSettings->textureLookupY = newY; + + // show block num + int mipLOD = _showSettings->mipNumber; + + int mipX = _showSettings->imageBoundsX; + int mipY = _showSettings->imageBoundsY; + + mipX = mipX >> mipLOD; + mipY = mipY >> mipLOD; + + mipX = std::max(1, mipX); + mipY = std::max(1, mipY); + + mipX = (int32_t)(uv.x * mipX); + mipY = (int32_t)(uv.y * mipY); + + // Has to be set in other call, not here + _showSettings->textureLookupMipX = mipX; + _showSettings->textureLookupMipY = mipY; + + // showEyedropperData(uv); + } + } +} + +bool Data::handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState) +{ + // Some data depends on the texture data (isSigned, isNormal, ..) + bool isChanged = false; + bool isStateChanged = false; + + // TODO: fix isChanged to only be set when value changes + // f.e. clamped values don't need to re-render + string text; + + if (action == _actionVertical) { + _showSettings->isVerticalUI = !_showSettings->isVerticalUI; + text = _showSettings->isVerticalUI ? "Vert UI" : "Horiz UI"; + + // just to update toggle state to Off + isStateChanged = true; + } + else if (action == _actionHideUI) { + // this means no image loaded yet + if (_noImageLoaded) { + return true; + } + + _showSettings->isHideUI = !_showSettings->isHideUI; + text = _showSettings->isHideUI ? "Hide UI" : "Show UI"; + + // just to update toggle state to Off + isStateChanged = true; + } + + else if (action == _actionR) { + if (!action->isHidden) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::ModeR001) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = TextureChannels::ModeR001; + text = "Mask R001"; + } + isChanged = true; + } + } + else if (action == _actionG) { + if (!action->isHidden) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::Mode0G01) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = TextureChannels::Mode0G01; + text = "Mask 0G01"; + } + isChanged = true; + } + } + else if (action == _actionB) { + if (!action->isHidden) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::Mode00B1) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = TextureChannels::Mode00B1; + text = "Mask 00B1"; + } + + isChanged = true; + } + } + else if (action == _actionA) { + if (!action->isHidden) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::ModeAAA1) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = TextureChannels::ModeAAA1; + text = "Mask AAA1"; + } + + isChanged = true; + } + } + else if (action == _actionPerf) { + Perf* perf = Perf::instance(); + + bool isCompressed = true; + if ((!_showSettings->isPerf) && perf->start("kramv", isCompressed)) { + _showSettings->isPerf = true; + } + else { + _showSettings->isPerf = false; + + if (perf->isRunning()) { + perf->stop(); + + // TODO: Only open in non-sandboxed builds, it calls system("open file") + // and this will have quarantine flag set if app not in app store + // or notarized, signed, sandboxed for distribution outside of app store + perf->openPerftrace(); + } + } + + text = "Perf "; + text += _showSettings->isPerf ? "On" : "Off"; + isChanged = true; + } + else if (action == _actionPlay) { + if (!action->isHidden) { + _showSettings->isPlayAnimations = !_showSettings->isPlayAnimations; + + //Renderer* renderer = (Renderer*)self.delegate; + //renderer.playAnimations = !renderer.playAnimations; + + text = _showSettings->isPlayAnimations ? "Play" : "Pause"; + isChanged = true; + } + } + else if (action == _actionShapeUVPreview) { + // toggle state + _showSettings->isUVPreview = !_showSettings->isUVPreview; + text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview"; + isChanged = true; + + _showSettings->uvPreviewFrames = 10; + } + + else if (action == _actionShapeChannel) { + _showSettings->advanceShapeChannel(isShiftKeyDown); + + text = _showSettings->shapeChannelText(); + isChanged = true; + } + else if (action == _actionLighting) { + _showSettings->advanceLightingMode(isShiftKeyDown); + text = _showSettings->lightingModeText(); + isChanged = true; + } + else if (action == _actionTangent) { + _showSettings->useTangent = !_showSettings->useTangent; + if (_showSettings->useTangent) + text = "Vertex Tangents"; + else + text = "Fragment Tangents"; + isChanged = true; + } + else if (action == _actionDebug) { + _showSettings->advanceDebugMode(isShiftKeyDown); + text = _showSettings->debugModeText(); + isChanged = true; + } + else if (action == _actionHelp) { + // display the chars for now + text = + "1234-rgba, Preview, Debug, A-show all\n" + "Info, Hud, Reload, 0-fit\n" + "Checker, Grid\n" + "Wrap, 8-signed, 9-premul\n" + "Mip, Face, Y-array\n" + "↓-next item, →-next counterpart\n" + "Lighting, S-shape, C-shape channel\n"; + + // just to update toggle state to Off + isStateChanged = true; + } + + else if (action == _actionFit) { + float zoom; + // fit image or mip + if (isShiftKeyDown) { + zoom = 1.0f; + } + else { + // fit to topmost image + zoom = _showSettings->zoomFit; + } + + // This zoom needs to be checked against zoom limits + // there's a cap on the zoom multiplier. + // This is reducing zoom which expands the image. + zoom *= 1.0f / (1 << _showSettings->mipNumber); + + // even if zoom same, still do this since it resets the pan + _showSettings->zoom = zoom; + + _showSettings->panX = 0.0f; + _showSettings->panY = 0.0f; + + text = "Scale Image\n"; + // if (doPrintPanZoom) { + // string tmp; + // sprintf(tmp, + // "Pan %.3f,%.3f\n" + // "Zoom %.2fx\n", + // _showSettings->panX, _showSettings->panY, _showSettings->zoom); + // text += tmp; + // } + + isChanged = true; + } + // reload key (also a quick way to reset the settings) + else if (action == _actionReload) { + //bool success = + _delegate.loadFile(); + + // reload at actual size + if (isShiftKeyDown) { + _showSettings->zoom = 1.0f; + } + + // Name change if image + if (_showSettings->isModel) + text = "Reload Model\n"; + else + text = "Reload Image\n"; + // if (doPrintPanZoom) { + // string tmp; + // sprintf(tmp, + // "Pan %.3f,%.3f\n" + // "Zoom %.2fx\n", + // _showSettings->panX, _showSettings->panY, _showSettings->zoom); + // text += tmp; + // } + + isChanged = true; + } + else if (action == _actionPreview) { + _showSettings->isPreview = !_showSettings->isPreview; + isChanged = true; + text = "Preview "; + text += _showSettings->isPreview ? "On" : "Off"; + } + else if (action == _actionDiff) { + _showSettings->isDiff = !_showSettings->isDiff; + isChanged = true; + text = "Diff "; + text += _showSettings->isDiff ? "On" : "Off"; + } + // TODO: might switch c to channel cycle, so could just hit that + // and depending on the content, it cycles through reasonable channel masks + + // toggle checkerboard for transparency + else if (action == _actionChecker) { + if (!action->isHidden) { + _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown; + isChanged = true; + text = "Checker "; + text += _showSettings->isCheckerboardShown ? "On" : "Off"; + } + } + + else if (action == _actionSrgb) { + if (!action->isHidden) { + _showSettings->isSRGBShown = !_showSettings->isSRGBShown; + + sprintf(text, "Format srgb %s", _showSettings->isSRGBShown ? "On" : "Off"); + + isChanged = true; + } + } + + // toggle pixel grid when magnified above 1 pixel, can happen from mipmap + // changes too + else if (action == _actionGrid) { + static int grid = 0; + static const int kNumGrids = 7; + +#define advanceGrid(g, dec) \ + grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids + + // if block size is 1, then this shouldn't toggle + _showSettings->isBlockGridShown = false; + _showSettings->isAtlasGridShown = false; + _showSettings->isPixelGridShown = false; + + advanceGrid(grid, isShiftKeyDown); + + static const uint32_t gridSizes[kNumGrids] = { + 0, 1, 4, 32, 64, 128, 256 // grid sizes + }; + + if (grid == 0) { + sprintf(text, "Grid Off"); + } + else if (grid == 1) { + _showSettings->isPixelGridShown = true; + + sprintf(text, "Pixel Grid 1x1"); + } + else if (grid == 2 && _showSettings->blockX > 1) { + _showSettings->isBlockGridShown = true; + + sprintf(text, "Block Grid %dx%d", _showSettings->blockX, + _showSettings->blockY); + } + else { + _showSettings->isAtlasGridShown = true; + + // want to be able to show altases tht have long entries derived from + // props but right now just a square grid atlas + _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid]; + + sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX, + _showSettings->gridSizeY); + } + + isChanged = true; + } + else if (action == _actionShowAll) { + if (!action->isHidden) { + // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips + _showSettings->isShowingAllLevelsAndMips = + !_showSettings->isShowingAllLevelsAndMips; + isChanged = true; + text = "Show All "; + text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off"; + } + } + + // toggle hud that shows name and pixel value under the cursor + // this may require calling setNeedsDisplay on the UILabel as cursor moves + else if (action == _actionHud) { + _showSettings->isHudShown = !_showSettings->isHudShown; + //[self updateHudVisibility]; + // isChanged = true; + text = "Hud "; + text += _showSettings->isHudShown ? "On" : "Off"; + isStateChanged = true; + } + + // info on the texture, could request info from lib, but would want to cache + // that info + else if (action == _actionInfo) { + if (_showSettings->isHudShown) { + // also hide the file table, since this can be long + //[self hideFileTable]; + + sprintf(text, "%s", + isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() + : _showSettings->imageInfo.c_str()); + } + // just to update toggle state to Off + isStateChanged = true; + } + + // toggle wrap/clamp + else if (action == _actionWrap) { + // TODO: cycle through all possible modes (clamp, repeat, mirror-once, + // mirror-repeat, ...) + _showSettings->isWrap = !_showSettings->isWrap; + isChanged = true; + text = "Wrap "; + text += _showSettings->isWrap ? "On" : "Off"; + } + + // toggle signed vs. unsigned + else if (action == _actionSigned) { + if (!action->isHidden) { + _showSettings->isSigned = !_showSettings->isSigned; + isChanged = true; + text = "Signed "; + text += _showSettings->isSigned ? "On" : "Off"; + } + } + + // toggle premul alpha vs. unmul + else if (action == _actionPremul) { + if (!action->isHidden) { + _showSettings->doShaderPremul = !_showSettings->doShaderPremul; + isChanged = true; + text = "Premul "; + text += _showSettings->doShaderPremul ? "On" : "Off"; + } + } + + else if (action == _actionItem || action == _actionPrevItem) { + if (!action->isHidden) { + // invert shift key for prev, since it's reverse + if (action == _actionPrevItem) { + isShiftKeyDown = !isShiftKeyDown; + } + + if (advanceFile(!isShiftKeyDown)) { + //_hudHidden = true; + //[self updateHudVisibility]; + //[self setEyedropperText:""]; + + isChanged = true; + + setLoadedText(text); + } + } + } + + else if (action == _actionCounterpart || action == _actionPrevCounterpart) { + if (!action->isHidden) { + // invert shift key for prev, since it's reverse + if (action == _actionPrevCounterpart) { + isShiftKeyDown = !isShiftKeyDown; + } + if (advanceCounterpart(!isShiftKeyDown)) { + //_hudHidden = true; + //[self updateHudVisibility]; + //[self setEyedropperText:""]; + + isChanged = true; + + setLoadedText(text); + } + } + } + + // test out different shapes + else if (action == _actionShapeMesh) { + if (_showSettings->meshCount > 1) { + _showSettings->advanceMeshNumber(isShiftKeyDown); + text = _showSettings->meshNumberText(); + isChanged = true; + } + } + + // TODO: should probably have these wrap and not clamp to count limits + + // mip up/down + else if (action == _actionMip) { + if (_showSettings->mipCount > 1) { + if (isShiftKeyDown) { + _showSettings->mipNumber = std::max(_showSettings->mipNumber - 1, 0); + } + else { + _showSettings->mipNumber = + std::min(_showSettings->mipNumber + 1, _showSettings->mipCount - 1); + } + sprintf(text, "Mip %d/%d", _showSettings->mipNumber, + _showSettings->mipCount); + isChanged = true; + } + } + + else if (action == _actionFace) { + // cube or cube array, but hit s to pick cubearray + if (_showSettings->faceCount > 1) { + if (isShiftKeyDown) { + _showSettings->faceNumber = std::max(_showSettings->faceNumber - 1, 0); + } + else { + _showSettings->faceNumber = + std::min(_showSettings->faceNumber + 1, _showSettings->faceCount - 1); + } + sprintf(text, "Face %d/%d", _showSettings->faceNumber, + _showSettings->faceCount); + isChanged = true; + } + } + + else if (action == _actionArray) { + // slice + if (_showSettings->sliceCount > 1) { + if (isShiftKeyDown) { + _showSettings->sliceNumber = std::max(_showSettings->sliceNumber - 1, 0); + } + else { + _showSettings->sliceNumber = + std::min(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1); + } + sprintf(text, "Slice %d/%d", _showSettings->sliceNumber, + _showSettings->sliceCount); + isChanged = true; + } + // array + else if (_showSettings->arrayCount > 1) { + if (isShiftKeyDown) { + _showSettings->arrayNumber = std::max(_showSettings->arrayNumber - 1, 0); + } + else { + _showSettings->arrayNumber = + std::min(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1); + } + sprintf(text, "Array %d/%d", _showSettings->arrayNumber, + _showSettings->arrayCount); + isChanged = true; + } + } + else { + // non-handled action + return false; + } + + actionState.hudText = text; + actionState.isChanged = isChanged; + actionState.isStateChanged = isStateChanged; + + return true; +} + +// only called on new or modstamp-changed image +void Data::updateImageSettings(const string& fullFilename, KTXImage& image, MyMTLPixelFormat format) +{ + _showSettings->isModel = false; + + // format may be trancoded to gpu-friendly format + MyMTLPixelFormat originalFormat = image.pixelFormat; + + _showSettings->blockX = image.blockDims().x; + _showSettings->blockY = image.blockDims().y; + + _showSettings->isSigned = isSignedFormat(format); + + TexContentType texContentType = findContentTypeFromFilename(fullFilename.c_str()); + _showSettings->texContentType = texContentType; + //_showSettings->isSDF = isSDF; + + // textures are already premul, so don't need to premul in shader + // should really have 3 modes, unmul, default, premul + bool isPNG = isPNGFilename(fullFilename.c_str()); + + _showSettings->isPremul = image.isPremul(); + _showSettings->doShaderPremul = false; + if (texContentType == TexContentTypeAlbedo && isPNG) { + _showSettings->doShaderPremul = + true; // convert to premul in shader, so can see other channels + } + + int32_t numChannels = numChannelsOfFormat(originalFormat); + _showSettings->numChannels = numChannels; + + // TODO: identify if texture holds normal data from the props + // have too many 4 channel normals that shouldn't swizzle like this + // kramTextures.py is using etc2rg on iOS for now, and not astc. + + _showSettings->isSwizzleAGToRG = false; + + // For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and + // GGGR or RRRG BC1nm multiply r*a in the shader, but just use BC5 anymore. + // if (isASTCFormat(originalFormat) && isNormal) { + // // channels after = "ag01" + // _showSettings->isSwizzleAGToRG = true; + // } + + // can derive these from texture queries + _showSettings->mipCount = (int32_t)image.mipLevels.size(); + _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube || + image.textureType == MyMTLTextureTypeCubeArray) + ? 6 + : 0; + _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements; + _showSettings->sliceCount = (int32_t)image.depth; + + _showSettings->imageBoundsX = (int32_t)image.width; + _showSettings->imageBoundsY = (int32_t)image.height; +} + +float zoom3D = 1.0f; + +void Data::updateProjTransform() +{ + // Want to move to always using perspective even for 2d images, but still more math + // to work out to keep zoom to cursor working. +#if USE_PERSPECTIVE + float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY; + _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f); + + // This was used to reset zoom to a baseline that had a nice zoom. But little connected to it now. + // Remember with rotation, the bounds can hit the nearClip. Note all shapes are 0.5 radius, + // so at 1 this is 2x to leave gap around the shape for now. + float shapeHeightInY = 1; + _showSettings->zoomFit = shapeHeightInY; // / (float)_showSettings->viewSizeY; + +#else + + if (_showSettings->isModel) { + float aspect = _showSettings->viewSizeX / (float)_showSettings->viewSizeY; + _projectionMatrix = perspective_rhcs(90.0f * (M_PI / 180.0f), aspect, 0.1f); + + _showSettings->zoomFit = 1; + } + else { + // ltrb + float2 rectDims = 0.5f * float2m(_showSettings->viewSizeX, _showSettings->viewSizeY); + float4 rect = float4m(-rectDims.x, rectDims.y, + rectDims.x, -rectDims.y); + + _projectionMatrix = + orthographic_rhcs(rect, 0.1f, 1e6f); + + // DONE: adjust zoom to fit the entire image to the window + _showSettings->zoomFit = + std::min((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) / + std::max(1.0f, std::max((float)_showSettings->imageBoundsX, + (float)_showSettings->imageBoundsY)); + + static bool useImageAndViewBounds = true; + if (useImageAndViewBounds) { + float invWidth = 1.0f / std::max(1.0f, (float)_showSettings->imageBoundsX); + float invHeight = 1.0f / std::max(1.0f, (float)_showSettings->imageBoundsY); + + // DONE: adjust zoom to fit the entire image to the window + // the best fit depends on dimension of image and window + _showSettings->zoomFit = + std::min((float)_showSettings->viewSizeX * invWidth, + (float)_showSettings->viewSizeY * invHeight); + } + } +#endif +} + +void Data::resetSomeImageSettings(bool isNewFile) +{ + // only reset these on new texture, but have to revalidate + if (isNewFile) { + // then can manipulate this after loading + _showSettings->mipNumber = 0; + _showSettings->faceNumber = 0; + _showSettings->arrayNumber = 0; + _showSettings->sliceNumber = 0; + + _showSettings->channels = TextureChannels::ModeRGBA; + + // wish could keep existing setting, but new texture might not + // be supported debugMode for new texture + _showSettings->debugMode = DebugMode::DebugModeNone; + + _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; + } + else { + // reloaded file may have different limits + _showSettings->mipNumber = + std::min(_showSettings->mipNumber, _showSettings->mipCount); + _showSettings->faceNumber = + std::min(_showSettings->faceNumber, _showSettings->faceCount); + _showSettings->arrayNumber = + std::min(_showSettings->arrayNumber, _showSettings->arrayCount); + _showSettings->sliceNumber = + std::min(_showSettings->sliceNumber, _showSettings->sliceCount); + } + + updateProjTransform(); + + // this controls viewMatrix (global to all visible textures) + _showSettings->panX = 0.0f; + _showSettings->panY = 0.0f; + + _showSettings->zoom = _showSettings->zoomFit; + + // Y is always 1.0 on the plane, so scale to imageBoundsY + // plane is already a non-uniform size, so can keep uniform scale + + // have one of these for each texture added to the viewer + //float scaleX = MAX(1, _showSettings->imageBoundsX); + float scaleY = std::max(1, _showSettings->imageBoundsY); + float scaleX = scaleY; + float scaleZ = scaleY; + + _modelMatrix2D = + float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // uniform scale + _modelMatrix2D = _modelMatrix2D * + translation(float3m(0.0f, 0.0f, -1.0)); // set z=-1 unit back + + // uniform scaled 3d primitive + float scale = scaleY; // MAX(scaleX, scaleY); + + // store the zoom into thew view matrix + // fragment tangents seem to break down at high model scale due to precision + // differences between worldPos and uv + // static bool useZoom3D = false; + // if (useZoom3D) { + // zoom3D = scale; // * _showSettings->viewSizeX / 2.0f; + // scale = 1.0; + // } + + _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale + _modelMatrix3D = + _modelMatrix3D * + translation(float3m(0.0f, 0.0f, -1.0f)); // set z=-1 unit back +} + +void Data::updateTransforms() +{ + // scale + float zoom = _showSettings->zoom; + + // translate + float4x4 panTransform = + translation(float3m(-_showSettings->panX, _showSettings->panY, 0.0)); + + if (_showSettings->is3DView) { + _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform + _viewMatrix3D = panTransform * _viewMatrix3D; + + _viewMatrix = _viewMatrix3D; + + // obj specific + _modelMatrix = _modelMatrix3D; + } + else { + _viewMatrix2D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); + _viewMatrix2D = panTransform * _viewMatrix2D; + + _viewMatrix = _viewMatrix2D; + + // obj specific + _modelMatrix = _modelMatrix2D; + } + + // viewMatrix should typically be the inverse + //_viewMatrix = simd_inverse(_viewMatrix); + + _projectionViewMatrix = _projectionMatrix * _viewMatrix; + + // cache the camera position + _cameraPosition = + inverse(_viewMatrix).columns[3].xyz; // this is all ortho + + // obj specific + float4 modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix); + _showSettings->isInverted = modelMatrixInvScale2.w < 0.0f; +} + +float4x4 Data::computeImageTransform(float panX, float panY, float zoom) +{ + // translate + float4x4 panTransform = translation(float3m(-panX, panY, 0.0)); + + // non-uniform scale is okay here, only affects ortho volume + // setting this to uniform zoom and object is not visible, zoom can be 20x in + // x and y + if (_showSettings->is3DView) { + zoom *= zoom3D; + } + + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); + viewMatrix = panTransform * viewMatrix; + + // scale + if (_showSettings->is3DView) { + return _projectionMatrix * viewMatrix * _modelMatrix3D; + } + else { + return _projectionMatrix * viewMatrix * _modelMatrix2D; + } +} + +void Data::doZoomMath(float newZoom, float2& newPan) +{ + // transform the cursor to texture coordinate, or clamped version if outside + float4x4 projectionViewModelMatrix = computeImageTransform( + _showSettings->panX, + _showSettings->panY, + _showSettings->zoom); + + // convert from pixel to clip space + float halfX = _showSettings->viewSizeX * 0.5f; + float halfY = _showSettings->viewSizeY * 0.5f; + + // sometimes get viewSizeX that's scaled by retina, and other times not. + // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on + // retina display) now passing down drawableSize instead of view.bounds.size + halfX /= (float)_showSettings->viewContentScaleFactor; + halfY /= (float)_showSettings->viewContentScaleFactor; + + float4x4 viewportMatrix = + { + (float4){halfX, 0, 0, 0}, + (float4){0, -halfY, 0, 0}, + (float4){0, 0, 1, 0}, + (float4){halfX, halfY, 0, 1}, + }; + viewportMatrix = inverse(viewportMatrix); + + float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f); + + cursor = viewportMatrix * cursor; + + //NSPoint clipPoint; + //clipPoint.x = (point.x - halfX) / halfX; + //clipPoint.y = -(point.y - halfY) / halfY; + + // convert point in window to point in model space + float4x4 mInv = inverse(projectionViewModelMatrix); + + float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f); + pixel.xyz /= pixel.w; // in case perspective used + + // allow pan to extend to show all + float ar = _showSettings->imageAspectRatio(); + float maxX = 0.5f * ar; + float minY = -0.5f; + if (_showSettings->isShowingAllLevelsAndMips) { + maxX += ar * 1.0f * (_showSettings->totalChunks() - 1); + minY -= 1.0f * (_showSettings->mipCount - 1); + } + + // X bound may need adjusted for ar ? + // that's in model space (+/0.5f, +/0.5f), so convert to texture space + pixel.x = std::clamp(pixel.x, -0.5f * ar, maxX); + pixel.y = std::clamp(pixel.y, minY, 0.5f); + + // now that's the point that we want to zoom towards + // No checks on this zoom + // old - newPosition from the zoom + + // normalized coords to pixel coords + pixel.x *= _showSettings->imageBoundsX; + pixel.y *= _showSettings->imageBoundsY; + + // this fixes pinch-zoom on cube which are 6:1 + pixel.x /= ar; + +#if USE_PERSPECTIVE + // TODO: this doesn't work for perspective + newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x; + newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y; +#else + newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x; + newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y; +#endif } -} // namespace kram +} // namespace kram diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 146c01c0..ddd9ef34 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -1,10 +1,10 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. #include -#include "KramLib.h" // for MyMTLPixelFormat +#include "KramLib.h" // for MyMTLPixelFormat //#include //#include @@ -12,8 +12,8 @@ namespace kram { -using namespace NAMESPACE_STL; -using namespace simd; +using namespace STL_NAMESPACE; +using namespace SIMD_NAMESPACE; enum TextureChannels { ModeRGBA = 0, @@ -53,13 +53,13 @@ enum ShapeChannel { ShapeChannelUV0, - ShapeChannelFaceNormal, // gen from dfdx and dfdy + ShapeChannelFaceNormal, // gen from dfdx and dfdy - ShapeChannelNormal, // vertex normal + ShapeChannelNormal, // vertex normal ShapeChannelTangent, ShapeChannelBitangent, - ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor + ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor // don't need bump, since can already see it, but what if combined diffuse + // normal ShapeChannelBumpNormal, @@ -68,13 +68,23 @@ enum ShapeChannel { }; enum LightingMode { - LightingModeDiffuse = 0, // amb + diffuse - LightingModeSpecular = 1, // amb + diffuse + specular - LightingModeNone = 2, // no lighting, just mips - + LightingModeDiffuse = 0, // amb + diffuse + LightingModeSpecular = 1, // amb + diffuse + specular + LightingModeNone = 2, // no lighting, just mips + LightingModeCount, }; +struct Atlas { + string name; + float x, y, w, h; + float u, v; // padding - to both or just left or right? + bool isVertical; + uint32_t level; + + float4 rect() const { return float4m(x, y, w, h); } +}; + class ShowSettings { public: // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of @@ -102,6 +112,14 @@ class ShowSettings { // DONE: hook all these up to shader and view bool isHudShown = true; + bool isHideUI = false; + bool isVerticalUI = true; + + bool isPlayAnimations = false; + + // Can get a dump of perf (mostly loading a decode/transcode perf) + bool isPerf = false; + // transparency checkboard under the image bool isCheckerboardShown = false; @@ -123,21 +141,22 @@ class ShowSettings { //bool isNormal = false; bool isSigned = false; - bool isPremul = false; // needed for png which only holds unmul + bool isPremul = false; // copy of whether image.isPremul() + bool doShaderPremul = false; // needed for png which only holds unmul bool isSwizzleAGToRG = false; //bool isSDF = false; TexContentType texContentType = TexContentTypeUnknown; - + // this mode shows the content with lighting or with bilinear/mips active bool isPreview = false; // Can collapse 3d to 2d and overlay the uv bool isUVPreview = false; - + uint32_t uvPreviewFrames = 0; float uvPreviewStep = 1.0f / 10.0f; float uvPreview = 0.0f; - + // the 2d view doesn't want to inset pixels for clamp, or point sampling is // thrown off expecially on small 4x4 textures #if USE_PERSPECTIVE @@ -145,27 +164,24 @@ class ShowSettings { #else bool is3DView = false; #endif - + // TODO: Might eliminate this, since mips are either built with or without // srgb and disabling with a MTLView caused many flags to have to be set on // MTLTexture - // bool isSRGBShown = true; + bool isSRGBShown = false; // whether to use normal to tangent (false), or vertex tangents (true) bool useTangent = true; - // draw with reverseZ to better match perspective - bool isReverseZ = true; + // image vs. gltf model + bool isModel = false; - // whether files are pulled from zip archive. - bool isArchive = false; + // if diff texture available, can show diff against source + bool isDiff = false; - // whether files are pulled from folder(s) - bool isFolder = false; + // currently loading the diff texture if found, this slows loads + bool hasDiffTexture = false; - // image vs. gltf model - bool isModel = false; - // can sample from drawable or from single source texture bool isEyedropperFromDrawable(); @@ -174,8 +190,10 @@ class ShowSettings { // this could be boundary of all visible images, so that pan doesn't go flying // off to nowhere - int32_t imageBoundsX = 0; // px - int32_t imageBoundsY = 0; // px + int32_t imageBoundsX = 0; // px + int32_t imageBoundsY = 0; // px + + bool outsideImageBounds = false; // size of the block, used in block grid drawing int32_t blockX = 1; @@ -201,8 +219,8 @@ class ShowSettings { float4 textureResult; // size of the view and its contentScaleFactor - int32_t viewSizeX = 1; // px - int32_t viewSizeY = 1; // px + int32_t viewSizeX = 1; // px + int32_t viewSizeY = 1; // px float viewContentScaleFactor = 1.0f; // cursor is in view coordinates, but doesn't include contentScaleFactor @@ -222,7 +240,7 @@ class ShowSettings { LightingMode lightingMode = LightingModeDiffuse; - bool isInverted; + bool isInverted = false; // cached on load, raw info about the texture from libkram string imageInfo; @@ -232,44 +250,345 @@ class ShowSettings { MyMTLPixelFormat originalFormat; MyMTLPixelFormat decodedFormat; + string windowTitleString(const char* filename) const; + void advanceMeshNumber(bool decrement); void advanceDebugMode(bool decrement); void advanceShapeChannel(bool decrement); void advanceLightingMode(bool decrement); - const char *meshNumberText() const; - const char *shapeChannelText() const; - const char *debugModeText() const; - const char *lightingModeText() const; - - const char *meshNumberName(uint32_t meshNumber) const; - + const char* meshNumberText() const; + const char* shapeChannelText() const; + const char* debugModeText() const; + const char* lightingModeText() const; + + const char* meshNumberName(uint32_t meshNumber) const; + void updateUVPreviewState(); - - float imageAspectRatio() const { + + float imageAspectRatio() const + { float ar = 1.0f; if (meshNumber == 0 && !isModel && imageBoundsY > 0) ar = imageBoundsX / (float)imageBoundsY; return ar; } + + bool isFileNew(const char* fullFilename) const + { + return lastFilename != fullFilename; + } + bool isFileChanged(const char* fullFilename, double timestamp) const + { + // Note that modstamp can change, but content data hash may be the same + return isFileNew(fullFilename) || (timestamp != lastTimestamp); + } + string lastFilename; double lastTimestamp = 0.0; int32_t meshNumber = 0; int32_t meshCount = 5; + + const Atlas* lastAtlas = nullptr; // Might move to index + vector atlas; +}; + +void printChannels(string& tmp, const string& label, float4 c, + int32_t numChannels, bool isFloat, bool isSigned); + +enum Key { + A = 0x00, + S = 0x01, + D = 0x02, + F = 0x03, + H = 0x04, + G = 0x05, + Z = 0x06, + X = 0x07, + C = 0x08, + V = 0x09, + B = 0x0B, + Q = 0x0C, + W = 0x0D, + E = 0x0E, + R = 0x0F, + Y = 0x10, + T = 0x11, + O = 0x1F, + U = 0x20, + I = 0x22, + P = 0x23, + L = 0x25, + J = 0x26, + K = 0x28, + N = 0x2D, + M = 0x2E, + + // https://eastmanreference.com/complete-list-of-applescript-key-codes + Num1 = 0x12, + Num2 = 0x13, + Num3 = 0x14, + Num4 = 0x15, + Num5 = 0x17, + Num6 = 0x16, + Num7 = 0x1A, + Num8 = 0x1C, + Num9 = 0x19, + Num0 = 0x1D, + + LeftBrace = 0x21, + RightBrace = 0x1E, + + LeftBracket = 0x21, + RightBracket = 0x1E, + + Quote = 0x27, + Semicolon = 0x29, + Backslash = 0x2A, + Comma = 0x2B, + Slash = 0x2C, + + LeftArrow = 0x7B, + RightArrow = 0x7C, + DownArrow = 0x7D, + UpArrow = 0x7E, + + Space = 0x31, + Escape = 0x35, }; -float4x4 matrix4x4_translation(float tx, float ty, float tz); +using kram_id = void*; -float4x4 perspective_rhs(float fovyRadians, float aspect, float nearZ, float - farZ, bool isReverseZ); +// This makes dealing with ui much simpler +class Action { +public: + Action(const char* icon_, const char* tip_, Key keyCode_) + : icon(icon_), tip(tip_), keyCode(keyCode_) {} -float4x4 orthographic_rhs(float width, float height, float nearZ, float farZ, - bool isReverseZ); + const char* icon; + const char* tip; -float4x4 matrix4x4_rotation(float radians, vector_float3 axis); + // Note these are not ref-counted, but AppKit already does + kram_id button; // NSButton* + kram_id menuItem; // NSMenuItem* + Key keyCode; -void printChannels(string &tmp, const string &label, float4 c, - int32_t numChannels, bool isFloat, bool isSigned); + bool isHighlighted = false; + bool isHidden = false; + bool isButtonDisabled = false; + + // This have platform impl + void setHighlight(bool enable); + void setHidden(bool enable); + void disableButton(); +}; + +// This is an open archive +struct FileContainer { + // allow zip files to be dropped and opened, and can advance through bundle + // content. + + // TODO: Add FileHelper if acrhive file is networked, but would require + // full load to memory. + + ZipHelper zip; + MmapHelper zipMmap; +}; + +struct ActionState { + string hudText; + bool isChanged; + bool isStateChanged; +}; + +enum TextSlot { + kTextSlotHud, + kTextSlotEyedropper, + kTextSlotAtlas, + + kTextSlotCount // not a slot +}; + +struct File { +public: + File(const char* name_, int32_t urlIndex_); + + // Note: not sorting by urlIndex currently + bool operator<(const File& rhs) const + { + // sort by shortname + int compare = strcasecmp(nameShort.c_str(), rhs.nameShort.c_str()); + if (compare != 0) + return compare < 0; + + // if equal, then sort by longname + return strcasecmp(name.c_str(), rhs.name.c_str()) < 0; + } + +public: + string name; + int32_t urlIndex; + string nameShort; // would alias name, but too risky +}; + +// This allows wrapping all the ObjC stuff +struct DataDelegate { + bool loadFile(bool clear = false); + + bool loadModelFile(const char* filename); + + bool loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive); + +public: + kram_id view; // MyMTKView* +}; + +struct Data { + Data(); + ~Data(); + + void clearAtlas(); + bool loadAtlasFile(const char* filename); + bool listFilesInArchive(int32_t urlIndex); + bool openArchive(const char* zipFilename, int32_t urlIndex); + + bool hasCounterpart(bool increment); + bool advanceCounterpart(bool increment); + bool advanceFile(bool increment); + + bool findFilename(const string& filename); + bool findFilenameShort(const string& filename); + const File* findFileShort(const string& filename); + const Atlas* findAtlasAtUV(float2 uv); + bool isArchive() const; + bool loadFile(); + + bool handleEventAction(const Action* action, bool isShiftKeyDown, ActionState& actionState); + void updateUIAfterLoad(); + void updateUIControlState(); + + const Action* actionFromMenu(kram_id menuItem) const; + const Action* actionFromButton(kram_id button) const; + const Action* actionFromKey(uint32_t keyCodes) const; + + void setLoadedText(string& text); + void setFailedText(const string& filename, string& text); + + void initActions(); + vector& actions() { return _actions; } + void initDisabledButtons(); + + string textFromSlots(bool isFileListHidden) const; + void setTextSlot(TextSlot slot, const char* text); + + void loadFilesFromUrls(vector& urls, bool skipSubdirs); + void listArchivesInFolder(const string& archiveFilename, vector& archiveFiles, bool skipSubdirs); + void listFilesInFolder(const string& folderFilename, int32_t urlIndex, bool skipSubdirs); + + // See these to split off ObjC code + DataDelegate _delegate; + + void updateEyedropper(); + + float4x4 computeImageTransform(float panX, float panY, float zoom); + void updateProjTransform(); + void resetSomeImageSettings(bool isNewFile); + void updateImageSettings(const string& fullFilename, KTXImage& image, MyMTLPixelFormat format); + + void doZoomMath(float newZoom, float2& newPan); + + void setPerfDirectory(const char* directory); + +private: + bool loadFileFromArchive(); + +public: + void showEyedropperData(const float2& uv); + void setEyedropperText(const char* text); + void setAtlasText(const char* text); + void updateTransforms(); + + //---------------- + float4x4 _projectionMatrix; + + float4x4 _projectionViewMatrix; + float3 _cameraPosition; + + float4x4 _viewMatrix; + float4x4 _viewMatrix2D; + float4x4 _viewMatrix3D; + + // object specific + float4x4 _modelMatrix; + //float4 _modelMatrixInvScale2; + float4x4 _modelMatrix2D; + float4x4 _modelMatrix3D; + + //---------------- + + vector _textSlots; + ShowSettings* _showSettings = nullptr; + + bool _noImageLoaded = true; + string _archiveName; // archive or blank + + // folders and archives and multi-drop files are filled into this + vector _files; + int32_t _fileIndex = 0; + + // One of these per url in _urlss + vector _containers; + vector _urls; + + Action* _actionPlay; + Action* _actionShapeUVPreview; + Action* _actionHelp; + Action* _actionInfo; + Action* _actionHud; + Action* _actionShowAll; + + Action* _actionPreview; + Action* _actionWrap; + Action* _actionPremul; + Action* _actionSigned; + Action* _actionSrgb; + Action* _actionPerf; + + Action* _actionDiff; + Action* _actionDebug; + Action* _actionGrid; + Action* _actionChecker; + Action* _actionHideUI; + Action* _actionVertical; + + Action* _actionMip; + Action* _actionFace; + Action* _actionArray; + Action* _actionItem; + Action* _actionPrevItem; + Action* _actionCounterpart; + Action* _actionPrevCounterpart; + Action* _actionReload; + Action* _actionFit; + + Action* _actionShapeMesh; + Action* _actionShapeChannel; + Action* _actionLighting; + Action* _actionTangent; + + Action* _actionR; + Action* _actionG; + Action* _actionB; + Action* _actionA; + + vector _actions; +}; + +bool isSupportedModelFilename(const char* filename); +bool isSupportedArchiveFilename(const char* filename); +bool isSupportedJsonFilename(const char* filename); + +//extern bool doPrintPanZoom; -} // namespace kram +} // namespace kram diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index c5663eb1..3ee1fd22 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -6,6 +6,7 @@ //@import Cocoa; //@import Metal; //@import MetalKit; +//@import CoreText; #import #import @@ -17,7 +18,8 @@ // C++ #include "KramLib.h" -#include "KramVersion.h" // keep kramv version in sync with libkram +#include "KramVersion.h" // keep kramv version in sync with libkram +#include "TaskSystem.h" //#include "KramMipper.h" @@ -27,159 +29,173 @@ //#include "KramZipHelper.h" //#include "KramImage.h" +#include // for recursive_mutex + #include "KramViewerBase.h" -#ifdef NDEBUG -static bool doPrintPanZoom = false; -#else -static bool doPrintPanZoom = false; -#endif +using mymutex = std::recursive_mutex; +using mylock = std::unique_lock; + +#include -using namespace simd; +using namespace SIMD_NAMESPACE; using namespace kram; -using namespace NAMESPACE_STL; +using namespace STL_NAMESPACE; + +// ktx, ktx2, png, and dds for images +// zip, metallib +// gltf, glb files for models +NSArray* utis = @[ + @"public.directory", + + [UTType typeWithFilenameExtension:@"png"].identifier, + [UTType typeWithFilenameExtension:@"ktx"].identifier, + [UTType typeWithFilenameExtension:@"ktx2"].identifier, + [UTType typeWithFilenameExtension:@"dds"].identifier, + + [UTType typeWithFilenameExtension:@"zip"].identifier, + [UTType typeWithFilenameExtension:@"metallib"].identifier, -bool isSupportedModelFilename(const char* filename) { #if USE_GLTF - return endsWithExtension(filename, ".gltf") || - endsWithExtension(filename, ".glb"); -#else - return false; + [UTType typeWithFilenameExtension:@"gltf"].identifier, + [UTType typeWithFilenameExtension:@"glb"].identifier, +//@"model/gltf+json", +//@"model/gltf+binary" +#endif +#if USE_USD + [UTType typeWithFilenameExtension:@"usd"].identifier, + [UTType typeWithFilenameExtension:@"usd"].identifier, + [UTType typeWithFilenameExtension:@"usda"].identifier, #endif -} -bool isSupportedArchiveFilename(const char* filename) { - return endsWithExtension(filename, ".zip"); -} + // read -atlas.json files + [UTType typeWithFilenameExtension:@"json"].identifier +]; +NSDictionary* pasteboardOptions = @{ + // This means only these uti can be droped. + NSPasteboardURLReadingContentsConformToTypesKey : utis -struct MouseData -{ + // Don't use this it prevents folder urls + //, NSPasteboardURLReadingFileURLsOnlyKey: @YES +}; + +struct MouseData { NSPoint originPoint; NSPoint oldPoint; NSPoint newPoint; - + NSPoint pan; }; -// this aliases the existing string, so can't chop extension -inline const char* toFilenameShort(const char* filename) { - const char* filenameShort = strrchr(filename, '/'); - if (filenameShort == nullptr) { - filenameShort = filename; +//------------- + +void Action::setHighlight(bool enable) +{ + isHighlighted = enable; + + auto On = 1; // NSControlStateValueOn; + auto Off = 0; // NSControlStateValueOff; + + if (!isButtonDisabled) { + ((__bridge NSButton*)button).state = enable ? On : Off; } - else { - filenameShort += 1; + ((__bridge NSMenuItem*)menuItem).state = enable ? On : Off; +} + +void Action::setHidden(bool enable) +{ + isHidden = enable; + + if (!isButtonDisabled) { + ((__bridge NSButton*)button).hidden = enable; } - return filenameShort; + ((__bridge NSMenuItem*)menuItem).hidden = enable; } -//------------- +void Action::disableButton() +{ + ((__bridge NSButton*)button).hidden = true; + isButtonDisabled = true; +} -enum Key { - A = 0x00, - S = 0x01, - D = 0x02, - F = 0x03, - H = 0x04, - G = 0x05, - Z = 0x06, - X = 0x07, - C = 0x08, - V = 0x09, - B = 0x0B, - Q = 0x0C, - W = 0x0D, - E = 0x0E, - R = 0x0F, - Y = 0x10, - T = 0x11, - O = 0x1F, - U = 0x20, - I = 0x22, - P = 0x23, - L = 0x25, - J = 0x26, - K = 0x28, - N = 0x2D, - M = 0x2E, - - // https://eastmanreference.com/complete-list-of-applescript-key-codes - Num1 = 0x12, - Num2 = 0x13, - Num3 = 0x14, - Num4 = 0x15, - Num5 = 0x17, - Num6 = 0x16, - Num7 = 0x1A, - Num8 = 0x1C, - Num9 = 0x19, - Num0 = 0x1D, - - LeftBrace = 0x21, - RightBrace = 0x1E, - - LeftBracket = 0x21, - RightBracket = 0x1E, - - Quote = 0x27, - Semicolon = 0x29, - Backslash = 0x2A, - Comma = 0x2B, - Slash = 0x2C, - - LeftArrow = 0x7B, - RightArrow = 0x7C, - DownArrow = 0x7D, - UpArrow = 0x7E, - - Space = 0x31, - Escape = 0x35, -}; +// These are using NSFileManager to list files, so must be ObjC +void Data::listArchivesInFolder(const string& folderFilename, vector& archiveFiles, bool skipSubdirs) +{ + NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:folderFilename.c_str()]]; + NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles; + if (skipSubdirs) + options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants; -// This makes dealing with ui much simpler -class Action { -public: - Action(const char* icon_, const char* tip_, Key keyCode_): icon(icon_), tip(tip_), keyCode(keyCode_) {} - - const char* icon; - const char* tip; - - // Note these are not ref-counted, but AppKit already does - id button; // NSButton* - id menuItem; // NSMenuItem* - Key keyCode; - - bool isHighlighted = false; - bool isHidden = false; - bool isButtonDisabled = false; - - void setHighlight(bool enable) { - isHighlighted = enable; - - auto On = NSControlStateValueOn; - auto Off = NSControlStateValueOff; - - if (!isButtonDisabled) { - ((NSButton*)button).state = enable ? On : Off; - } - ((NSMenuItem*)menuItem).state = enable ? On : Off; - } - - void setHidden(bool enable) { - isHidden = enable; - - if (!isButtonDisabled) { - ((NSButton*)button).hidden = enable; + NSDirectoryEnumerator* directoryEnumerator = + [[NSFileManager defaultManager] + enumeratorAtURL:url + includingPropertiesForKeys:[NSArray array] + options:options + errorHandler: // nil + ^BOOL(NSURL* urlArg, NSError* error) { + macroUnusedVar(urlArg); + macroUnusedVar(error); + + // handle error + return false; + }]; + + // only display models in folder if found, ignore the png/jpg files + while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) { + const char* name = fileOrDirectoryURL.fileSystemRepresentation; + + bool isArchive = isSupportedArchiveFilename(name); + if (isArchive) { + archiveFiles.emplace_back(File(name, 0)); } - ((NSMenuItem*)menuItem).hidden = enable; } - - void disableButton() { - ((NSButton*)button).hidden = true; - isButtonDisabled = true; +} + +void Data::listFilesInFolder(const string& archiveFilename, int32_t urlIndex, bool skipSubdirs) +{ + // Hope this hsas same permissions + NSURL* url = [NSURL fileURLWithPath:[NSString stringWithUTF8String:archiveFilename.c_str()]]; + + NSDirectoryEnumerationOptions options = NSDirectoryEnumerationSkipsHiddenFiles; + if (skipSubdirs) + options |= NSDirectoryEnumerationSkipsSubdirectoryDescendants; + + NSDirectoryEnumerator* directoryEnumerator = + [[NSFileManager defaultManager] + enumeratorAtURL:url + includingPropertiesForKeys:[NSArray array] + options:options + errorHandler: // nil + ^BOOL(NSURL* urlArg, NSError* error) { + macroUnusedVar(urlArg); + macroUnusedVar(error); + + // handle error - don't change to folder if devoid of valid content + return false; + }]; + + while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) { + const char* name = fileOrDirectoryURL.fileSystemRepresentation; + + bool isValid = isSupportedFilename(name); + +#if USE_GLTF || USE_USD + // note: many gltf reference jpg which will load via GltfAsset, but + // kram and kramv do not import jpg files. + if (!isValid) { + isValid = isSupportedModelFilename(name); + } +#endif + + if (!isValid) { + isValid = isSupportedJsonFilename(name); + } + if (isValid) { + _files.emplace_back(File(name, urlIndex)); + } } -}; +} //------------- @@ -190,9 +206,7 @@ @interface MyNSTextField : NSTextField @end -@implementation MyNSTextField -{ - +@implementation MyNSTextField { } // override to allow clickthrough @@ -206,12 +220,9 @@ - (NSView*)hitTest:(NSPoint)aPoint //------------- @interface MyMTKView : MTKView -// for now only have a single imageURL -@property(retain, nonatomic, readwrite, nullable) NSURL *imageURL; -//@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture; @property(retain, nonatomic, readwrite, nullable) - NSMagnificationGestureRecognizer *zoomGesture; + NSMagnificationGestureRecognizer* zoomGesture; @property(nonatomic, readwrite) double lastArchiveTimestamp; @@ -223,12 +234,11 @@ @interface MyMTKView : MTKView @property(nonatomic, readwrite) float validMagnification; @property(nonatomic, readwrite) MouseData mouseData; +- (BOOL)loadTextureFromURLs:(NSArray*)url; -- (BOOL)loadTextureFromURL:(NSURL *)url; - -- (void)setHudText:(const char *)text; +- (void)setHudText:(const char*)text; -- (void)tableViewSelectionDidChange:(NSNotification *)notification; +- (void)tableViewSelectionDidChange:(NSNotification*)notification; - (void)addNotifications; @@ -242,46 +252,48 @@ - (void)fixupDocumentList; // https://medium.com/@kevingutowski/how-to-setup-a-tableview-in-2019-obj-c-c7dece203333 @interface TableViewController : NSObject -@property (nonatomic, strong) NSMutableArray* items; +@property(nonatomic, strong) NSMutableArray* items; @end @implementation TableViewController -- (instancetype)init { +- (instancetype)init +{ self = [super init]; - + _items = [[NSMutableArray alloc] init]; + return self; } // NSTableViewDataSource -- (NSInteger)numberOfRowsInTableView:(NSTableView *)tableView +- (NSInteger)numberOfRowsInTableView:(NSTableView*)tableView { return self.items.count; } // NSTableViewDelegate --(NSView *)tableView:(NSTableView *)tableView viewForTableColumn:(NSTableColumn *)tableColumn row:(NSInteger)row +- (NSView*)tableView:(NSTableView*)tableView viewForTableColumn:(NSTableColumn*)tableColumn row:(NSInteger)row { NSString* identifier = tableColumn.identifier; NSTableCellView* cell = [tableView makeViewWithIdentifier:identifier owner:self]; - cell.textField.stringValue = [self.items objectAtIndex:row]; + cell.textField.attributedStringValue = [self.items objectAtIndex:row]; return cell; } // NSTableViewDelegate -- (BOOL)tableView:(NSTableView *)tableView -shouldTypeSelectForEvent:(NSEvent *)event -withCurrentSearchString:(NSString *)searchString +- (BOOL)tableView:(NSTableView*)tableView + shouldTypeSelectForEvent:(NSEvent*)event + withCurrentSearchString:(NSString*)searchString { // Return NO to prevent type select (otherwise S or N key will search that key) // This is nice on long lists though. return NO; } -- (void)tableViewSelectionDidChange:(NSNotification *)notification +- (void)tableViewSelectionDidChange:(NSNotification*)notification { - // does not need to respond, have a listener on this notification + // does not need to respond, have a listener on this notification } @end @@ -308,7 +320,7 @@ - (instancetype)init + (BOOL)autosavesInPlace { - return NO; // YES; + return NO; // YES; } // call when "new" called @@ -321,8 +333,8 @@ - (void)makeWindowControllers //addWindowController:controller]; } -- (NSData *)dataOfType:(nonnull NSString *)typeName - error:(NSError *_Nullable __autoreleasing *)outError +- (NSData*)dataOfType:(nonnull NSString*)typeName + error:(NSError* _Nullable __autoreleasing*)outError { // Insert code here to write your document to data of the specified type. If // outError != NULL, ensure that you create and set an appropriate error if @@ -334,25 +346,19 @@ - (NSData *)dataOfType:(nonnull NSString *)typeName return nil; } - - -- (BOOL)readFromURL:(nonnull NSURL *)url - ofType:(nonnull NSString *)typeName - error:(NSError *_Nullable __autoreleasing *)outError +- (BOOL)readFromURL:(nonnull NSURL*)url + ofType:(nonnull NSString*)typeName + error:(NSError* _Nullable __autoreleasing*)outError { // called from OpenRecent documents menu -#if 0 - //MyMTKView* view = self.windowControllers.firstObject.window.contentView; - //return [view loadTextureFromURL:url]; -#else + // throw into an array + NSArray* urls = @[ url ]; - // TODO: This is only getting called on first open on macOS 12.0 even with hack below. - // find out why. - NSApplication* app = [NSApplication sharedApplication]; MyMTKView* view = app.mainWindow.contentView; - BOOL success = [view loadTextureFromURL:url]; + + BOOL success = [view loadTextureFromURLs:urls]; if (success) { // Note: if I return NO from this call then a dialog pops up that image // couldn't be loaded, but then the readFromURL is called everytime a new @@ -363,7 +369,6 @@ - (BOOL)readFromURL:(nonnull NSURL *)url } return success; -#endif } @end @@ -382,41 +387,121 @@ @interface AppDelegate () @implementation AppDelegate -- (void)applicationDidFinishLaunching:(NSNotification *)aNotification +- (void)applicationDidFinishLaunching:(NSNotification*)aNotification { // Insert code here to initialize your application } -- (void)applicationWillTerminate:(NSNotification *)aNotification +- (void)applicationWillTerminate:(NSNotification*)aNotification { // Insert code here to tear down your application } - (BOOL)applicationShouldTerminateAfterLastWindowClosed: - (NSApplication *)sender + (NSApplication*)sender { return YES; } -- (void)application:(NSApplication *)sender - openURLs:(nonnull NSArray *)urls +- (void)application:(NSApplication*)sender + openURLs:(nonnull NSArray*)urls { // this is called from "Open In..." - MyMTKView *view = sender.mainWindow.contentView; + MyMTKView* view = sender.mainWindow.contentView; + [view loadTextureFromURLs:urls]; + [view fixupDocumentList]; +} - // TODO: if more than one url dropped, and they are albedo/nmap, then display - // them together with the single uv set. Need controls to show one or all - // together. +/* may need to add code for NSSavePanel for perftrace (.gz) +- (void)exportDocument:(NSString*)name toType:(NSString*)typeUTI +{ + NSWindow* window = [[[self windowControllers] objectAtIndex:0] window]; - // TODO: also do an overlapping diff if two files are dropped with same - // dimensions. + // Build a new name for the file using the current name and + // the filename extension associated with the specified UTI. + CFStringRef newExtension = UTTypeCopyPreferredTagWithClass((CFStringRef)typeUTI, + kUTTagClassFilenameExtension); + NSString* newName = [[name stringByDeletingPathExtension] + stringByAppendingPathExtension:(NSString*)newExtension]; + CFRelease(newExtension); - NSURL* url = urls.firstObject; - [view loadTextureFromURL:url]; - [view fixupDocumentList]; + // Set the default name for the file and show the panel. + NSSavePanel* panel = [NSSavePanel savePanel]; + [panel setNameFieldStringValue:newName]; + [panel beginSheetModalForWindow:window completionHandler:^(NSInteger result){ + if (result == NSFileHandlingPanelOKButton) + { + NSURL* theFile = [panel URL]; + + // Write the contents in the new format. + + } + }]; } +*/ + +// this isn't filtered by the document types specified, NSDocumentController? +// added public.folder instead, this would need to call readFromURL +- (IBAction)openDocument:(id)sender +{ + // need to implement, or default NSOpenPanel can't specify a directory + NSDocumentController* controller = [NSDocumentController sharedDocumentController]; +#if 0 + // Would be nice, but doesn't allow directory. + // How is NSDocument aware of directory, from Info.plist? +// NSArray* urls = [controller URLsFromRunningOpenPanel]; +// if (urls) { +// NSLog(@"selected URL: %@", urls[0]); +// +// } +#else + + NSOpenPanel* panel = [NSOpenPanel openPanel]; + [panel setCanChooseFiles:YES]; + [panel setCanChooseDirectories:YES]; + [panel setAllowsMultipleSelection:NO]; + + if ([controller runModalOpenPanel:panel forTypes:utis] == NSModalResponseOK) { + NSArray* urls = [panel URLs]; + NSURL* url = [urls objectAtIndex:0]; + + // This gets a file:// urls, and then openDocument won't open it if + // it's a folder. + + bool isDirectory = false; + if (url.isFileURL) { + BOOL isDir = NO; + // Verify that the file exists + // and is indeed a directory (isDirectory is an out parameter) + if ([[NSFileManager defaultManager] fileExistsAtPath:url.path isDirectory:&isDir] && isDir) { + isDirectory = true; + } + } + + if (isDirectory) { + // have to open this directory URL directly + //[self openURLs:[NSApplication sharedApplication] urls:urls]; + // this is called from "Open In..." + NSApplication* app = [NSApplication sharedApplication]; + MyMTKView* view = app.mainWindow.contentView; + [view loadTextureFromURLs:urls]; + [view fixupDocumentList]; + } + else { + [controller openDocumentWithContentsOfURL:url + display:YES + completionHandler: + ^(NSDocument* doc, BOOL isAlreadOpen, NSError* error) { + if (!error) { + // what should this do? + } + }]; + } + } +#endif +} - (IBAction)showAboutDialog:(id)sender { @@ -428,7 +513,7 @@ - (IBAction)showAboutDialog:(id)sender // want to embed the git tag here options[@"Copyright"] = - [NSString stringWithUTF8String:"kram ©2020-2022 by Alec Miller"]; + [NSString stringWithUTF8String:"kram ©2020-2024 by Alec Miller"]; // add a link to kram website, skip the Visit text NSMutableAttributedString* str = [[NSMutableAttributedString alloc] @@ -473,74 +558,47 @@ - (IBAction)showAboutDialog:(id)sender @end -// also NSPasteboardTypeURL -// also NSPasteboardTypeTIFF -NSArray *pasteboardTypes = @[ NSPasteboardTypeFileURL ]; +NSArray* pasteboardTypes = @[ + // don't really want generic urls, but need folders to drop + //NSPasteboardTypeURL + + // this is preventing folder drops ? + NSPasteboardTypeFileURL +]; + +/* correlates with + +public.directory. +public.png, +org.khronos.ktx, +public.ktx2, +com.microsoft.dds, +public.zip-archive, +dyn.ah62d4rv4ge8043pyqf0g24pc, // ick - metallib +dyn.ah62d4rv4ge80s5dyq2, // ick - gltf +dyn.ah62d4rv4ge80s5dc // ick - glb + +*/ + +//---------------------------------------------------- @implementation MyMTKView { - NSMenu* _viewMenu; // really the items + NSMenu* _viewMenu; // really the items NSStackView* _buttonStack; - NSMutableArray* _buttonArray; + NSMutableArray* _buttonArray; NSTextField* _hudLabel; NSTextField* _hudLabel2; - + // Offer list of files in archives // TODO: move to NSOutlineView since that can show archive folders with content inside IBOutlet NSTableView* _tableView; IBOutlet TableViewController* _tableViewController; - - vector _textSlots; - ShowSettings* _showSettings; - // allow zip files to be dropped and opened, and can advance through bundle - // content - ZipHelper _zip; - MmapHelper _zipMmap; - int32_t _fileArchiveIndex; - BOOL _noImageLoaded; - - vector _folderFiles; - int32_t _fileFolderIndex; - - Action* _actionPlay; - Action* _actionShapeUVPreview; - Action* _actionHelp; - Action* _actionInfo; - Action* _actionHud; - Action* _actionShowAll; - - Action* _actionPreview; - Action* _actionWrap; - Action* _actionPremul; - Action* _actionSigned; - - Action* _actionDebug; - Action* _actionGrid; - Action* _actionChecker; - Action* _actionHideUI; - Action* _actionVertical; - - Action* _actionMip; - Action* _actionFace; - Action* _actionArray; - Action* _actionItem; - Action* _actionPrevItem; - Action* _actionCounterpart; - Action* _actionPrevCounterpart; - Action* _actionReload; - Action* _actionFit; - - Action* _actionShapeMesh; - Action* _actionShapeChannel; - Action* _actionLighting; - Action* _actionTangent; - - Action* _actionR; - Action* _actionG; - Action* _actionB; - Action* _actionA; - - vector _actions; + // copy of modifier flags, can tie drop actions to this + NSEventModifierFlags _modifierFlags; + + ShowSettings* _showSettings; + Data _data; } - (void)awakeFromNib @@ -552,10 +610,19 @@ - (void)awakeFromNib CGRect rect = scrollView.frame; rect.origin.y += 50; scrollView.frame = rect; - + + // C++ delegate + _data._delegate.view = (__bridge void*)self; + + // this is sandbox or root if not sandboxed + // This is objC call... + // This has to be in a .mm file to call + std::string traceDir = [NSHomeDirectory() UTF8String]; + traceDir += "/traces/"; + _data.setPerfDirectory(traceDir.c_str()); // TODO: see if can only open this - // NSLog(@"AwakeFromNIB"); + // KLOGI("Viewer", "AwakeFromNIB"); } // to get upper left origin like on UIView @@ -575,26 +642,23 @@ - (BOOL)isFlipped // TODO: Sometimes getting panels from right side popping in when trying to pan // on macOS without using pan gesture. -- (instancetype)initWithCoder:(NSCoder *)coder +- (instancetype)initWithCoder:(NSCoder*)coder { self = [super initWithCoder:coder]; - _showSettings = new ShowSettings; + _showSettings = _data._showSettings; - self.clearColor = MTLClearColorMake(0.0f, 0.0f, 0.0f, 0.0f); + self.clearColor = MTLClearColorMake(0.005f, 0.005f, 0.005f, 0.0f); - self.clearDepth = _showSettings->isReverseZ ? 0.0f : 1.0f; + self.clearDepth = 0.0f; // only re-render when changes are made // Note: this breaks ability to gpu capture, since display link not running. // so disable this if want to do captures. Or just move the cursor to capture. -#ifndef NDEBUG // KRAM_RELEASE self.enableSetNeedsDisplay = YES; -#endif + // openFile in appDelegate handles "Open in..." - _textSlots.resize(2); - // added for drag-drop support [self registerForDraggedTypes:pasteboardTypes]; @@ -611,29 +675,34 @@ - (instancetype)initWithCoder:(NSCoder *)coder _buttonStack = [self _addButtons]; // hide until image loaded + _showSettings->isHideUI = true; _buttonStack.hidden = YES; - _noImageLoaded = YES; _hudLabel2 = [self _addHud:YES]; _hudLabel = [self _addHud:NO]; [self setHudText:""]; - + return self; } -- (nonnull ShowSettings *)showSettings +- (nonnull ShowSettings*)showSettings { return _showSettings; } --(void)fixupDocumentList +- (nonnull kram::Data*)data +{ + return &_data; +} + +- (void)fixupDocumentList { // DONE: this recent menu only seems to work the first time // and not in subsequent calls to the same entry. readFromUrl isn't even // called. So don't get a chance to switch back to a recent texture. Maybe // there's some list of documents created and so it doesn't think the file // needs to be reloaded. - + // Clear the document list so readFromURL keeps getting called // Can't remove currentDoc, so have to skip that NSDocumentController* dc = [NSDocumentController sharedDocumentController]; @@ -649,109 +718,15 @@ -(void)fixupDocumentList } } -- (NSStackView *)_addButtons -{ - // Don't reorder without also matching actionPtrs below - Action actions[] = { - Action("?", "Help", Key::Slash), - Action("I", "Info", Key::I), - Action("H", "Hud", Key::H), - Action("U", "UI", Key::U), - Action("V", "UI Vertical", Key::V), - - Action("D", "Debug", Key::D), - Action("G", "Grid", Key::G), - Action("B", "Checkerboard", Key::B), - - Action("", "", Key::A), // sep - - Action("P", "Preview", Key::P), - Action("W", "Wrap", Key::W), - Action("8", "Premul", Key::Num8), - Action("7", "Signed", Key::Num7), - - Action("", "", Key::A), // sep - - Action("A", "Show All", Key::A), - Action("M", "Mip", Key::M), - Action("F", "Face", Key::F), - Action("Y", "Array", Key::Y), - - Action("↑", "Prev Item", Key::UpArrow), - Action("↓", "Next Item", Key::DownArrow), - Action("←", "Prev Counterpart", Key::LeftArrow), - Action("→", "Next Counterpart", Key::RightArrow), - - Action("R", "Reload", Key::R), - Action("0", "Fit", Key::Num0), - - Action("", "", Key::A), // sep - - Action(" ", "Play", Key::Space), // TODO: really need icon on this - Action("6", "Shape UVPreview", Key::Num6), - Action("S", "Shape", Key::S), - Action("C", "Shape Channel", Key::C), - Action("L", "Lighting", Key::L), - Action("T", "Tangents", Key::T), - - Action("", "", Key::A), // sep - - // make these individual toggles and exclusive toggle off shift - Action("1", "Red", Key::Num1), - Action("2", "Green", Key::Num2), - Action("3", "Blue", Key::Num3), - Action("4", "Alpha", Key::Num4), - }; - - // These have to be in same order as above. May want to go back to search for text above. - Action** actionPtrs[] = { - &_actionHelp, - &_actionInfo, - &_actionHud, - &_actionHideUI, - &_actionVertical, - - &_actionDebug, - &_actionGrid, - &_actionChecker, - - &_actionPreview, - &_actionWrap, - &_actionPremul, - &_actionSigned, - - &_actionShowAll, - &_actionMip, - &_actionFace, - &_actionArray, - - &_actionPrevItem, - &_actionItem, - &_actionPrevCounterpart, - &_actionCounterpart, - - &_actionReload, - &_actionFit, - - &_actionPlay, - &_actionShapeUVPreview, - &_actionShapeMesh, - &_actionShapeChannel, - &_actionLighting, - &_actionTangent, - - &_actionR, - &_actionG, - &_actionB, - &_actionA, - }; - +- (NSStackView*)_addButtons +{ + _data.initActions(); + NSRect rect = NSMakeRect(0, 10, 30, 30); - #define ArrayCount(x) (sizeof(x) / sizeof(x[0])) + vector& actions = _data.actions(); + int32_t numActions = actions.size(); - int32_t numActions = ArrayCount(actions); - NSMutableArray* buttons = [[NSMutableArray alloc] init]; for (int32_t i = 0; i < numActions; ++i) { @@ -776,58 +751,40 @@ - (NSStackView *)_addButtons [button setFrame:rect]; // https://stackoverflow.com/questions/4467597/how-do-you-stroke-the-outside-of-an-nsattributedstring - + NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys: - //[NSFont systemFontOfSize:64.0],NSFontAttributeName, - [NSColor whiteColor],NSForegroundColorAttributeName, - [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName, - [NSColor blackColor],NSStrokeColorAttributeName, - nil]; + //[NSFont systemFontOfSize:64.0],NSFontAttributeName, + [NSColor whiteColor], NSForegroundColorAttributeName, + [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName, + [NSColor blackColor], NSStrokeColorAttributeName, + nil]; NSMutableDictionary* attribsOn = [NSMutableDictionary dictionaryWithObjectsAndKeys: - //[NSFont systemFontOfSize:64.0],NSFontAttributeName, - [NSColor systemBlueColor],NSForegroundColorAttributeName, - [NSNumber numberWithFloat:-2.0],NSStrokeWidthAttributeName, - [NSColor blackColor],NSStrokeColorAttributeName, - nil]; + //[NSFont systemFontOfSize:64.0],NSFontAttributeName, + [NSColor systemBlueColor], NSForegroundColorAttributeName, + [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName, + [NSColor blackColor], NSStrokeColorAttributeName, + nil]; button.attributedTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOff]; - + // Have to set this too, or button doesn't go blue button.attributedAlternateTitle = [[NSMutableAttributedString alloc] initWithString:name attributes:attribsOn]; - -#if 0 // this isn't appearing - button.wantsLayer = YES; - if (button.layer) { -// CGFloat glowColor[] = {1.0, 0.0, 0.0, 1.0}; -// button.layer.masksToBounds = false; -// button.layer.shadowColor = CGColorCreate(CGColorSpaceCreateDeviceRGB(), glowColor); -// button.layer.shadowRadius = 10.0; -// button.layer.shadowOpacity = 1.0; -// //button.layer.shadowOffset = .zero; - - NSShadow* dropShadow = [[NSShadow alloc] init]; - [dropShadow setShadowColor:[NSColor redColor]]; - [dropShadow setShadowOffset:NSMakeSize(0, 0)]; - [dropShadow setShadowBlurRadius:10.0]; - [button setShadow: dropShadow]; - } -#endif - + // stackView seems to disperse the items evenly across the area, so this // doesn't work bool isSeparator = icon[0] == 0; - + if (isSeparator) { // rect.origin.y += 11; button.enabled = NO; } else { - action.button = button; - + action.button = (__bridge void*)button; + // rect.origin.y += 25; // TODO: add icons //button.image = ...; - + // keep all buttons, since stackView will remove and pack the stack [_buttonArray addObject:button]; } @@ -838,7 +795,7 @@ - (NSStackView *)_addButtons NSStackView* stackView = [NSStackView stackViewWithViews:buttons]; stackView.orientation = NSUserInterfaceLayoutOrientationVertical; stackView.detachesHiddenViews = - YES; // default, but why have to have _buttonArrary + YES; // default, but why have to have _buttonArrary [self addSubview:stackView]; // Want menus, so user can define their own shortcuts to commands @@ -855,93 +812,67 @@ - (NSStackView *)_addButtons for (int32_t i = 0; i < numActions; ++i) { Action& action = actions[i]; - const char* icon = action.icon; // single char + const char* icon = action.icon; // single char const char* title = action.tip; NSString* toolTip = [NSString stringWithUTF8String:icon]; NSString* name = [NSString stringWithUTF8String:title]; bool isSeparator = icon[0] == 0; - + if (isSeparator) { [_viewMenu addItem:[NSMenuItem separatorItem]]; } else { // NSString *shortcut = @""; // for now, or AppKit turns key int cmd+shift+key NSString* shortcut = [NSString stringWithUTF8String:icon]; - + NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction:) keyEquivalent:shortcut]; menuItem.toolTip = toolTip; - + // All key-equivalents assume cmd, so unset cmd // still leaves shift next to keys, but better than nothing menuItem.keyEquivalentModifierMask = (NSEventModifierFlags)0; - + // TODO: add icons, also onStateImage, offStageImage, mixedStateImage //menuItem.image = ...; - + // can set an integer constant that represents menu that avoid testing string (actionID) //menuItem.tag = ...; - + // TODO: menus and buttons should reflect any toggle state // menuItem.state = Mixed/Off/On; [_viewMenu addItem:menuItem]; - - action.menuItem = menuItem; + + action.menuItem = (__bridge void*)menuItem; } } [_viewMenu addItem:[NSMenuItem separatorItem]]; //---------------------- - - // copy all of them to a vector, and then assign the action ptrs - for (int32_t i = 0; i < numActions; ++i) { - Action& action = actions[i]; - const char* icon = action.icon; // single char - - // skip separators - bool isSeparator = icon[0] == 0; - if (isSeparator) continue; - - _actions.push_back(action); - } - - // now alias Actions to the vector above - //assert(_actions.size() == ArrayCount(actionPtrs)); - for (int32_t i = 0; i < _actions.size(); ++i) { - *(actionPtrs[i]) = &_actions[i]; - } - - // don't want these buttons showing up, menu only - _actionPrevItem->disableButton(); - _actionItem->disableButton(); - _actionPrevCounterpart->disableButton(); - _actionCounterpart->disableButton(); - - _actionHud->disableButton(); - _actionHelp->disableButton(); - _actionHideUI->disableButton(); - _actionVertical->disableButton(); - + + // don't want some buttons showing up, menu only + _data.initDisabledButtons(); + return stackView; } -- (NSTextField *)_addHud:(BOOL)isShadow +- (NSTextField*)_addHud:(BOOL)isShadow { // TODO: This text field is clamping to the height, so have it set to 1200. // really want field to expand to fill the window height for large output uint32_t w = 800; uint32_t h = 1220; - + // add a label for the hud NSTextField* label = [[MyNSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, w, h)]; - + label.preferredMaxLayoutWidth = w; label.drawsBackground = NO; @@ -952,12 +883,12 @@ - (NSTextField *)_addHud:(BOOL)isShadow label.editable = NO; label.selectable = NO; label.lineBreakMode = NSLineBreakByClipping; - label.maximumNumberOfLines = 0; // fill to height + label.maximumNumberOfLines = 0; // fill to height // important or interferes with table view label.refusesFirstResponder = YES; label.enabled = NO; - + label.cell.scrollable = NO; label.cell.wraps = NO; @@ -972,90 +903,13 @@ - (NSTextField *)_addHud:(BOOL)isShadow return label; } -- (void)doZoomMath:(float)newZoom newPan:(float2 &)newPan -{ - // transform the cursor to texture coordinate, or clamped version if outside - Renderer* renderer = (Renderer *)self.delegate; - float4x4 projectionViewModelMatrix = - [renderer computeImageTransform:_showSettings->panX - panY:_showSettings->panY - zoom:_showSettings->zoom]; - - // convert from pixel to clip space - float halfX = _showSettings->viewSizeX * 0.5f; - float halfY = _showSettings->viewSizeY * 0.5f; - - // sometimes get viewSizeX that's scaled by retina, and other times not. - // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on - // retina display) now passing down drawableSize instead of view.bounds.size - halfX /= (float)_showSettings->viewContentScaleFactor; - halfY /= (float)_showSettings->viewContentScaleFactor; - - float4x4 viewportMatrix = - { - (float4){ halfX, 0, 0, 0 }, - (float4){ 0, -halfY, 0, 0 }, - (float4){ 0, 0, 1, 0 }, - (float4){ halfX, halfY, 0, 1 }, - }; - viewportMatrix = inverse(viewportMatrix); - - float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f); - - cursor = viewportMatrix * cursor; - - //NSPoint clipPoint; - //clipPoint.x = (point.x - halfX) / halfX; - //clipPoint.y = -(point.y - halfY) / halfY; - - // convert point in window to point in model space - float4x4 mInv = inverse(projectionViewModelMatrix); - - float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f); - pixel.xyz /= pixel.w; // in case perspective used - - // allow pan to extend to show all - float ar = _showSettings->imageAspectRatio(); - float maxX = 0.5f * ar; - float minY = -0.5f; - if (_showSettings->isShowingAllLevelsAndMips) { - maxX += ar * 1.0f * (_showSettings->totalChunks() - 1); - minY -= 1.0f * (_showSettings->mipCount - 1); - } - - // X bound may need adjusted for ar ? - // that's in model space (+/0.5f, +/0.5f), so convert to texture space - pixel.x = NAMESPACE_STL::clamp(pixel.x, -0.5f * ar, maxX); - pixel.y = NAMESPACE_STL::clamp(pixel.y, minY, 0.5f); - - // now that's the point that we want to zoom towards - // No checks on this zoom - // old - newPosition from the zoom - - // normalized coords to pixel coords - pixel.x *= _showSettings->imageBoundsX; - pixel.y *= _showSettings->imageBoundsY; - - // this fixes pinch-zoom on cube which are 6:1 - pixel.x /= ar; - -#if USE_PERSPECTIVE - // TODO: this doesn't work for perspective - newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x; - newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y; -#else - newPan.x = _showSettings->panX - (_showSettings->zoom - newZoom) * pixel.x; - newPan.y = _showSettings->panY + (_showSettings->zoom - newZoom) * pixel.y; -#endif -} - -- (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer +- (void)handleGesture:(NSGestureRecognizer*)gestureRecognizer { // skip until image loaded if (_showSettings->imageBoundsX == 0) { return; } - + // https://cocoaosxrevisited.wordpress.com/2018/01/06/chapter-18-mouse-events/ if (gestureRecognizer != _zoomGesture) { return; @@ -1066,10 +920,10 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer float zoom = _zoomGesture.magnification; if (isFirstGesture) { _zoomGesture.magnification = 1.0f; - + _validMagnification = 1.0f; _originalZoom = _showSettings->zoom; - + zoom = _originalZoom; } else if (zoom * _originalZoom < 0.1f) { @@ -1077,7 +931,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer zoom = 0.1f / _originalZoom; _zoomGesture.magnification = zoom; } - + if (!isFirstGesture) { // try expontental (this causes a jump, comparison avoids an initial jump // zoom = powf(zoom, 1.05f); @@ -1085,30 +939,28 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer // doing multiply instead of equals here, also does exponential zom zoom *= _originalZoom; } - + [self updateZoom:zoom]; } --(void)updateZoom:(float)zoom +- (void)updateZoom:(float)zoom { // https://developer.apple.com/documentation/uikit/touches_presses_and_gestures/handling_uikit_gestures/handling_pinch_gestures?language=objc // need to sync up the zoom when action begins or zoom will jump - // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position // DONE: rect is now ar:1 for rect case, so these x values need to be half ar // and that's only if it's not rotated. box/cube/ellipse make also not correspond float ar = _showSettings->imageAspectRatio(); - + // find the cursor location with respect to the image - float4 bottomLeftCorner = float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f); - float4 topRightCorner = float4m(0.5 * ar, 0.5f, 0.0f, 1.0f); + float4 bottomLeftCorner = float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f); + float4 topRightCorner = float4m(0.5f * ar, 0.5f, 0.0f, 1.0f); - Renderer* renderer = (Renderer *)self.delegate; - float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX - panY:_showSettings->panY - zoom:zoom]; + float4x4 newMatrix = _data.computeImageTransform(_showSettings->panX, + _showSettings->panY, + zoom); // don't allow panning the entire image off the view boundary // transform the upper left and bottom right corner of the image @@ -1121,25 +973,25 @@ -(void)updateZoom:(float)zoom // see that rectangle intersects the view, view is -1 to 1 // this handles inversion - float2 ptOrigin = simd::min(pt0.xy, pt1.xy); + float2 ptOrigin = SIMD_NAMESPACE::min(pt0.xy, pt1.xy); float2 ptSize = abs(pt0.xy - pt1.xy); - CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); - CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); + float4 imageRect = float4m(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); + float4 viewRect = float4m(-1.0f, -1.0f, 2.0f, 2.0f); int32_t numTexturesX = _showSettings->totalChunks(); int32_t numTexturesY = _showSettings->mipCount; if (_showSettings->isShowingAllLevelsAndMips) { - imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height; + imageRect.y -= (numTexturesY - 1) * imageRect.w; - imageRect.size.width *= numTexturesX; - imageRect.size.height *= numTexturesY; + imageRect.z *= numTexturesX; // w + imageRect.w *= numTexturesY; // h } - float visibleWidth = imageRect.size.width * _showSettings->viewSizeX / + float visibleWidth = imageRect.z * _showSettings->viewSizeX / _showSettings->viewContentScaleFactor; - float visibleHeight = imageRect.size.height * _showSettings->viewSizeY / + float visibleHeight = imageRect.w * _showSettings->viewSizeY / _showSettings->viewContentScaleFactor; // don't allow image to get too big @@ -1149,23 +1001,23 @@ -(void)updateZoom:(float)zoom //float minZoom = std::min(1.0f/8.0f, _showSettings->zoomFit); // TODO: 3d models have imageBoundsY of 1, so the limits are hit immediately - + int32_t gap = _showSettings->showAllPixelGap; - + // Note this includes chunks and mips even if those are not shown // so image could be not visible. float2 maxZoomXY; maxZoomXY.x = maxZoom * (_showSettings->imageBoundsX + gap) * numTexturesX; maxZoomXY.y = maxZoom * (_showSettings->imageBoundsY + gap) * numTexturesY; - + float minPixelSize = 4; float2 minZoomXY; minZoomXY.x = minPixelSize; // minZoom * (_showSettings->imageBoundsX + gap) * numTexturesX; minZoomXY.y = minPixelSize; // minZoom * (_showSettings->imageBoundsY + gap) * numTexturesY; - + // don't allow image to get too big bool isZoomChanged = true; - + if (visibleWidth > maxZoomXY.x || visibleHeight > maxZoomXY.y) { isZoomChanged = false; } @@ -1176,12 +1028,12 @@ -(void)updateZoom:(float)zoom } // or completely off-screen - if (!NSIntersectsRect(imageRect, viewRect)) { + if (!rectIntersectsRect(imageRect, viewRect)) { isZoomChanged = false; } - + if (!isZoomChanged) { - _zoomGesture.magnification = _validMagnification; + _zoomGesture.magnification = _validMagnification; // objC return; } @@ -1190,55 +1042,56 @@ -(void)updateZoom:(float)zoom // feels wrong. now adjust the pan so that cursor text stays locked under // (zoom to cursor) float2 newPan; - [self doZoomMath:zoom newPan:newPan]; + _data.doZoomMath(zoom, newPan); // store this - _validMagnification = _zoomGesture.magnification; + _validMagnification = _zoomGesture.magnification; // objC _showSettings->zoom = zoom; _showSettings->panX = newPan.x; _showSettings->panY = newPan.y; - if (doPrintPanZoom) { - string text; - sprintf(text, - "Pan %.3f,%.3f\n" - "Zoom %.2fx\n", - _showSettings->panX, _showSettings->panY, _showSettings->zoom); - [self setHudText:text.c_str()]; - } + // if (doPrintPanZoom) { + // string text; + // sprintf(text, + // "Pan %.3f,%.3f\n" + // "Zoom %.2fx\n", + // _showSettings->panX, _showSettings->panY, _showSettings->zoom); + // [self setHudText:text.c_str()]; + // } - [self updateEyedropper]; - self.needsDisplay = YES; + // Cause a new sample for eyedropper + _data.updateEyedropper(); + + self.needsDisplay = YES; // objC } } - // left mouse button down -- (void)mouseDown:(NSEvent *)event +- (void)mouseDown:(NSEvent*)event { // skip until image loaded if (_showSettings->imageBoundsX == 0) { return; } - + _mouseData.originPoint = - _mouseData.oldPoint = - _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil]; + _mouseData.oldPoint = + _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil]; // capture pan value and cursor value _mouseData.pan = NSMakePoint(_showSettings->panX, _showSettings->panY); } // drag is mouse movement with left button down -- (void)mouseDragged:(NSEvent *)event +- (void)mouseDragged:(NSEvent*)event { // skip until image loaded if (_showSettings->imageBoundsX == 0) { return; } - + _mouseData.oldPoint = _mouseData.newPoint; _mouseData.newPoint = [self convertPoint:[event locationInWindow] fromView:nil]; @@ -1248,574 +1101,234 @@ - (void)mouseDragged:(NSEvent *)event delta.y = _mouseData.newPoint.y - _mouseData.originPoint.y; delta.x = -delta.x; delta.y = -delta.y; - + // scale to actual px or mouse cursor doesn't track drag delta.x *= _showSettings->viewContentScaleFactor; delta.y *= _showSettings->viewContentScaleFactor; - + // This is correct, but scale to image so cursor tracks the pick location // might be over a different mip/chunk though. float panX = _mouseData.pan.x + delta.x; float panY = _mouseData.pan.y + delta.y; - + [self updatePan:panX panY:panY]; } -- (void)mouseUp:(NSEvent *)event +- (void)mouseUp:(NSEvent*)event { // ignore up even though cursor may have moved - } -- (void)mouseMoved:(NSEvent *)event +- (void)mouseMoved:(NSEvent*)event { // skip until image loaded if (_showSettings->imageBoundsX == 0) { return; } - - // pixel in non-square window coords, run thorugh inverse to get texel space + + // pixel in non-square window coords, run through inverse to get texel space // I think magnofication of zoom gesture is affecting coordinates reported by // this - NSPoint point = [self convertPoint:[event locationInWindow] fromView:nil]; + NSPoint point = [event locationInWindow]; + + // This flips so upper left corner is 0,0, vs. bottom left + point = [self convertPoint:point fromView:nil]; // this needs to change if view is resized, but will likely receive mouseMoved // events _showSettings->cursorX = (int32_t)point.x; _showSettings->cursorY = (int32_t)point.y; - // should really do this in draw call, since moved messeage come in so quickly - [self updateEyedropper]; + _data.updateEyedropper(); + + // Cause a new sample for eyedropper (will run in Metal CompletedHandler) + self.needsDisplay = YES; } -inline float4 toPremul(const float4 &c) +- (void)updateEyedropperText { - // premul with a - float4 cpremul = c; - float a = c.a; - cpremul.w = 1.0f; - cpremul *= a; - return cpremul; -} + if (_showSettings->imageBoundsX == 0) return; -// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8 -// are still off and need to use the following. -float toSnorm8(float c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } -float2 toSnorm8(float2 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } -float3 toSnorm8(float3 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } -float4 toSnorm8(float4 c) { return (255.0f / 127.0f) * c - (128.0f / 127.0f); } + float2 uv; + uv.x = _showSettings->textureLookupX / (float)_showSettings->imageBoundsX; + uv.y = _showSettings->textureLookupY / (float)_showSettings->imageBoundsY; -float4 toSnorm(float4 c) { return 2.0f * c - 1.0f; } + // convert data to text + _data.showEyedropperData(uv); -- (void)updateEyedropper -{ - if ((!_showSettings->isHudShown)) { - return; - } + const Atlas* atlas = _data.findAtlasAtUV(uv); + if (atlas) { + // convert back to pixels in the current mip + float mipBoundsX = std::max(1, _showSettings->imageBoundsX >> _showSettings->mipNumber); + float mipBoundsY = std::max(1, _showSettings->imageBoundsY >> _showSettings->mipNumber); - if (_showSettings->imageBoundsX == 0) { - // TODO: this return will leave old hud text up - return; - } + float4 rect = atlas->rect(); + rect.xz *= mipBoundsX; + rect.yw *= mipBoundsY; - // don't wait on renderer to update this matrix - Renderer* renderer = (Renderer *)self.delegate; + string atlasText; + sprintf(atlasText, "%d,%d %dx%d %s", + (int32_t)rect.x, (int32_t)rect.y, + (int32_t)rect.z, (int32_t)rect.w, + atlas->name.c_str()); + _data.setAtlasText(atlasText.c_str()); + } + else { + _data.setAtlasText(""); + } + // This calls setNeedsDisplay on the hud section that displays the eyeDropper + [self updateHudText]; +} - if (_showSettings->isEyedropperFromDrawable()) { - // this only needs the cursor location, but can't supply uv to - // displayPixelData +- (void)setEyedropperText:(const char*)text +{ + _data.setEyedropperText(text); + [self updateHudText]; +} - if (_showSettings->lastCursorX != _showSettings->cursorX || - _showSettings->lastCursorY != _showSettings->cursorY) { - // TODO: this means pan/zoom doesn't update data, may want to track some - // absolute location in virtal canvas. +- (void)setHudText:(const char*)text +{ + _data.setTextSlot(kTextSlotHud, text); + [self updateHudText]; +} - _showSettings->lastCursorX = _showSettings->cursorX; - _showSettings->lastCursorY = _showSettings->cursorY; +- (void)updateHudText +{ + // combine textSlots + string text = _data.textFromSlots(_tableView.hidden); - // This just samples from drawable, so no re-render is needed - [self showEyedropperData:float2m(0, 0)]; + NSString* textNS = [NSString stringWithUTF8String:text.c_str()]; - // TODO: remove this, but only way to get drawSamples to execute right - // now, but then entire texture re-renders and that's not power efficient. - // Really just want to sample from the already rendered texture since - // content isn't animated. + // This is drop shadowed by drawing same text twice + _hudLabel2.stringValue = textNS; + _hudLabel2.needsDisplay = YES; - self.needsDisplay = YES; - } + _hudLabel.stringValue = textNS; + _hudLabel.needsDisplay = YES; +} +- (void)scrollWheel:(NSEvent*)event +{ + // skip until image loaded + if (_showSettings->imageBoundsX == 0) { return; } - float4x4 projectionViewModelMatrix = - [renderer computeImageTransform:_showSettings->panX - panY:_showSettings->panY - zoom:_showSettings->zoom]; - - // convert to clip space, or else need to apply additional viewport transform - float halfX = _showSettings->viewSizeX * 0.5f; - float halfY = _showSettings->viewSizeY * 0.5f; - - // sometimes get viewSizeX that's scaled by retina, and other times not. - // account for contentScaleFactor (viewSizeX is 2x bigger than cursorX on - // retina display) now passing down drawableSize instead of view.bounds.size - halfX /= (float)_showSettings->viewContentScaleFactor; - halfY /= (float)_showSettings->viewContentScaleFactor; - - float4 cursor = float4m(_showSettings->cursorX, _showSettings->cursorY, 0.0f, 1.0f); - - float4x4 pixelToClipTfm = - { - (float4){ halfX, 0, 0, 0 }, - (float4){ 0, -halfY, 0, 0 }, - (float4){ 0, 0, 1, 0 }, - (float4){ halfX, halfY, 0, 1 }, - }; - pixelToClipTfm = inverse(pixelToClipTfm); - - cursor = pixelToClipTfm * cursor; - - //float4 clipPoint; - //clipPoint.x = (point.x - halfX) / halfX; - //clipPoint.y = -(point.y - halfY) / halfY; - - // convert point in window to point in texture - float4x4 mInv = inverse(projectionViewModelMatrix); - - float4 pixel = mInv * float4m(cursor.x, cursor.y, 1.0f, 1.0f); - pixel.xyz /= pixel.w; // in case perspective used - - float ar = _showSettings->imageAspectRatio(); - - // that's in model space (+/0.5f * ar, +/0.5f), so convert to texture space - pixel.x = 0.999f * (pixel.x / ar + 0.5f); - pixel.y = 0.999f * (-pixel.y + 0.5f); - - float2 uv = pixel.xy; - - // pixels are 0 based - pixel.x *= _showSettings->imageBoundsX; - pixel.y *= _showSettings->imageBoundsY; - - // TODO: finish this logic, need to account for gaps too, and then isolate to - // a given level and mip to sample - // if (_showSettings->isShowingAllLevelsAndMips) { - // pixel.x *= _showSettings->totalChunks(); - // pixel.y *= _showSettings->mipCount; - // } - - // TODO: clearing out the last px visited makes it hard to gather data - // put value on clipboard, or allow click to lock the displayed pixel and - // value. Might just change header to px(last): ... - string text; - - // only display pixel if over image - if (pixel.x < 0.0f || pixel.x >= (float)_showSettings->imageBoundsX) { - sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y); - [self setEyedropperText:text.c_str()]; // ick - return; - } - if (pixel.y < 0.0f || pixel.y >= (float)_showSettings->imageBoundsY) { - // was blanking out, but was going blank on color_grid-a when over zoomed in - // image maybe not enough precision with float. - sprintf(text, "canvas: %d %d\n", (int32_t)pixel.x, (int32_t)pixel.y); - [self setEyedropperText:text.c_str()]; + // From ImGui notes: + // From macOS 12.1, scrolling with two fingers and then decelerating + // by tapping two fingers results in two events appearing. + if (event.phase == NSEventPhaseCancelled) return; - } - // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it - // back. + double wheelX = [event scrollingDeltaX]; + double wheelY = [event scrollingDeltaY]; - int32_t newX = (int32_t)pixel.x; - int32_t newY = (int32_t)pixel.y; + // Ugh, how we we tell mouseWheel from trackpad gesture calling this? + // if([event phase]) - supposedly only set on trackpad, but Apple MagicMouse does this on wheel + // and trackpad fires on that too causing the image to zoom away to nothing (inertia maybe) + // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures + bool isMouse = ![event hasPreciseScrollingDeltas]; - if (_showSettings->textureLookupX != newX || - _showSettings->textureLookupY != newY) { - // Note: this only samples from the original texture via compute shaders - // so preview mode pixel colors are not conveyed. But can see underlying - // data driving preview. + if (isMouse) { + // zoom with mouse + float zoom = _zoomGesture.magnification; + if (wheelY != 0.0) { + wheelY *= 0.01; + wheelY = std::clamp(wheelY, -0.1, 0.1); - // %.0f rounds the value, but want truncation - _showSettings->textureLookupX = newX; - _showSettings->textureLookupY = newY; + zoom *= 1.0 + wheelY; - [self showEyedropperData:uv]; + // here have to modify the magnfication, since gesture isn't driving it + _zoomGesture.magnification = zoom; - // TODO: remove this, but only way to get drawSamples to execute right now, - // but then entire texture re-renders and that's not power efficient. - self.needsDisplay = YES; + [self updateZoom:zoom]; + } } -} + else { + // pan with trackpad + wheelY = -wheelY; + wheelX = -wheelX; -- (void)showEyedropperData:(float2)uv -{ - string text; - string tmp; + float panX = _showSettings->panX + wheelX; + float panY = _showSettings->panY + wheelY; - float4 c = _showSettings->textureResult; + [self updatePan:panX panY:(float)panY]; + } +} - // DONE: use these to format the text - MyMTLPixelFormat format = _showSettings->originalFormat; - bool isSrgb = isSrgbFormat(format); - bool isSigned = isSignedFormat(format); +bool rectIntersectsRect(float4 lhs, float4 rhs) +{ + // convert rect from (origin, size) to (min, max) + float4 lRect = lhs.xyxy; + lRect.zw += lhs.zw; - bool isHdr = isHdrFormat(format); - bool isFloat = isHdr; + float4 rRect = rhs.xyxy; + rRect.zw += rhs.zw; - int32_t numChannels = _showSettings->numChannels; + return all(lRect.xy <= rRect.zw) && // min <= max + all(lRect.zw >= rRect.xy); // max >= min +} - bool isNormal = _showSettings->texContentType == TexContentTypeNormal; - bool isColor = !isNormal; +// TODO: move to data, but eliminate CGRect usage +- (void)updatePan:(float)panX panY:(float)panY +{ + //Renderer* renderer = (Renderer *)self.delegate; + float4x4 projectionViewModelMatrix = + _data.computeImageTransform(panX, + panY, + _showSettings->zoom); - bool isDirection = false; - bool isValue = false; + // don't allow panning the entire image off the view boundary + // transform the upper left and bottom right corner or the image - if (_showSettings->isEyedropperFromDrawable()) { - // TODO: could write barycentric, then lookup uv from that - // then could show the block info. + // what if zoom moves it outside? + float ar = _showSettings->imageAspectRatio(); - // interpret based on shapeChannel, debugMode, etc - switch (_showSettings->shapeChannel) { - case ShapeChannelDepth: - isSigned = false; // using fract on uv + float4 pt0 = projectionViewModelMatrix * float4m(-0.5f * ar, -0.5f, 0.0f, 1.0f); + float4 pt1 = projectionViewModelMatrix * float4m(0.5f * ar, 0.5f, 0.0f, 1.0f); - isValue = true; - isFloat = true; - numChannels = 1; - break; - case ShapeChannelUV0: - isSigned = false; // using fract on uv + // for perspective + pt0.xyz /= pt0.w; + pt1.xyz /= pt1.w; - isValue = true; - isFloat = true; - numChannels = 2; // TODO: fix for 3d uvw - break; + float2 ptOrigin = SIMD_NAMESPACE::min(pt0.xy, pt1.xy); + float2 ptSize = abs(pt0.xy - pt1.xy); - case ShapeChannelFaceNormal: - case ShapeChannelNormal: - case ShapeChannelTangent: - case ShapeChannelBitangent: - isDirection = true; - numChannels = 3; + // see that rectangle intersects the view, view is -1 to 1 + float4 imageRect = float4m(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); + float4 viewRect = float4m(-1.0f, -1.0f, 2.0f, 2.0f); - // convert unorm to snnorm - c = toSnorm(c); - break; + int32_t numTexturesX = _showSettings->totalChunks(); + int32_t numTexturesY = _showSettings->mipCount; - case ShapeChannelMipLevel: - isValue = true; - isSigned = false; - isFloat = true; + if (_showSettings->isShowingAllLevelsAndMips) { + imageRect.y -= (numTexturesY - 1) * imageRect.w; - // viz is mipNumber as alpha - numChannels = 1; - c.r = 4.0 - (c.a * 4.0); - break; + imageRect.z *= numTexturesX; // w + imageRect.w *= numTexturesY; // h + } - default: - break; - } + if (!rectIntersectsRect(imageRect, viewRect)) { + return; + } - // debug mode + if (_showSettings->panX != panX || _showSettings->panY != panY) { + _showSettings->panX = panX; + _showSettings->panY = panY; - // preview vs. not - } - else { - // this will be out of sync with gpu eval, so may want to only display px - // from returned lookup this will always be a linear color - - int32_t x = _showSettings->textureResultX; - int32_t y = _showSettings->textureResultY; - - // show uv, so can relate to gpu coordinates stored in geometry and find - // atlas areas - append_sprintf(text, "uv:%0.3f %0.3f\n", - (float)x / _showSettings->imageBoundsX, - (float)y / _showSettings->imageBoundsY); - - // pixel at top-level mip - append_sprintf(text, "px:%d %d\n", x, y); - - // show block num - int mipLOD = _showSettings->mipNumber; - - int mipX = _showSettings->imageBoundsX; - int mipY = _showSettings->imageBoundsY; - - mipX = mipX >> mipLOD; - mipY = mipY >> mipLOD; - - mipX = std::max(1, mipX); - mipY = std::max(1, mipY); - - mipX = (int32_t)(uv.x * mipX); - mipY = (int32_t)(uv.y * mipY); - - _showSettings->textureLookupMipX = mipX; - _showSettings->textureLookupMipY = mipY; - - // TODO: may want to return mip in pixel readback - // don't have it right now, so don't display if preview is enabled - if (_showSettings->isPreview) - mipLOD = 0; - - auto blockDims = blockDimsOfFormat(format); - if (blockDims.x > 1) - append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x, - mipY / blockDims.y); - - // TODO: on astc if we have original blocks can run analysis from - // astc-encoder about each block. - - // show the mip pixel (only if not preview and mip changed) - if (mipLOD > 0 && !_showSettings->isPreview) - append_sprintf(text, "mpx: %d %d\n", mipX, mipY); - - // TODO: more criteria here, can have 2 channel PBR metal-roughness - // also have 4 channel normals where zw store other data. - - bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); - if (isSigned && !isDecodeSigned) { - c = toSnorm8(c); - } - } - - if (isValue) { - printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned); - text += tmp; - } - else if (isDirection) { - // print direction - isFloat = true; - isSigned = true; - - printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned); - text += tmp; - } - else if (isNormal) { - float nx = c.x; - float ny = c.y; - - // unorm -> snorm - if (!isSigned) { - nx = toSnorm8(nx); - ny = toSnorm8(ny); - } - - // Note: not clamping nx,ny to < 1 like in shader - - // this is always postive on tan-space normals - // assuming we're not viewing world normals - const float maxLen2 = 0.999 * 0.999; - float len2 = nx * nx + ny * ny; - if (len2 > maxLen2) - len2 = maxLen2; - - float nz = sqrt(1.0f - len2); - - // print the underlying color (some nmaps are xy in 4 channels) - printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); - text += tmp; - - // print direction - float4 d = float4m(nx, ny, nz, 0.0f); - isFloat = true; - isSigned = true; - printChannels(tmp, "dir: ", d, 3, isFloat, isSigned); - text += tmp; - } - else if (isColor) { - // DONE: write some print helpers based on float4 and length - printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); - text += tmp; - - if (isSrgb) { - // this saturates the value, so don't use for extended srgb - float4 s = linearToSRGB(c); - - printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned); - text += tmp; - } - - // display the premul values too, but not fully transparent pixels - if (c.a > 0.0 && c.a < 1.0f) { - printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); - text += tmp; - - // TODO: do we need the premul srgb color too? - if (isSrgb) { - // this saturates the value, so don't use for extended srgb - float4 s = linearToSRGB(c); - - printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, - isSigned); - text += tmp; - } - } - } - - [self setEyedropperText:text.c_str()]; - - // TODO: range display of pixels is useful, only showing pixels that fall - // within a given range, but would need slider then, and determine range of - // pixels. - // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1). - - // TOOD: display histogram from compute, bin into buffer counts of pixels - - // DONE: stop clobbering hud text, need another set of labels - // and a zoom preview of the pixels under the cursor. - // Otherwise, can't really see the underlying color. - - // TODO: Stuff these on clipboard with a click, or use cmd+C? -} - -enum TextSlot -{ - kTextSlotHud, - kTextSlotEyedropper -}; - -- (void)setEyedropperText:(const char *)text -{ - _textSlots[kTextSlotEyedropper] = text; - - [self updateHudText]; -} - -- (void)setHudText:(const char *)text -{ - _textSlots[kTextSlotHud] = text; - - [self updateHudText]; -} - -- (void)updateHudText -{ - // combine textSlots - string text = _textSlots[kTextSlotHud]; - if (!text.empty() && text.back() != '\n') - text += "\n"; - - // don't show eyedropper text with table up, it's many lines and overlaps - if (!_tableView.hidden) - text += _textSlots[kTextSlotEyedropper]; - - NSString *textNS = [NSString stringWithUTF8String:text.c_str()]; - _hudLabel2.stringValue = textNS; - _hudLabel2.needsDisplay = YES; - - _hudLabel.stringValue = textNS; - _hudLabel.needsDisplay = YES; -} - -- (void)scrollWheel:(NSEvent *)event -{ - // skip until image loaded - if (_showSettings->imageBoundsX == 0) { - return; - } - - // From ImGui notes: - // From macOS 12.1, scrolling with two fingers and then decelerating - // by tapping two fingers results in two events appearing. - if (event.phase == NSEventPhaseCancelled) - return; - - double wheelX = [event scrollingDeltaX]; - double wheelY = [event scrollingDeltaY]; - - // Ugh, how we we tell mouseWheel from trackpad gesture calling this? - // if([event phase]) - supposedly only set on trackpad, but Apple MagicMouse does this on wheel - // and trackpad fires on that too causing the image to zoom away to nothing (inertia maybe) - // https://stackoverflow.com/questions/6642058/mac-cocoa-how-can-i-detect-trackpad-scroll-gestures - bool isMouse = ![event hasPreciseScrollingDeltas]; - - if (isMouse) { - // zoom with mouse - float zoom = _zoomGesture.magnification; - if (wheelY != 0.0) { - wheelY *= 0.01; - wheelY = clamp(wheelY, -0.1, 0.1); - - zoom *= 1.0 + wheelY; - - // here have to modify the magnfication, since gesture isn't driving it - _zoomGesture.magnification = zoom; - - [self updateZoom: zoom]; - } - } - else { - // pan with trackpad - wheelY = -wheelY; - wheelX = -wheelX; - - float panX = _showSettings->panX + wheelX; - float panY = _showSettings->panY + wheelY; - - [self updatePan:panX panY:(float)panY]; - } -} - -- (void)updatePan:(float)panX panY:(float)panY -{ - Renderer* renderer = (Renderer *)self.delegate; - float4x4 projectionViewModelMatrix = - [renderer computeImageTransform:panX - panY:panY - zoom:_showSettings->zoom]; - - // don't allow panning the entire image off the view boundary - // transform the upper left and bottom right corner or the image - - // what if zoom moves it outside? - float ar = _showSettings->imageAspectRatio(); - - float4 pt0 = projectionViewModelMatrix * float4m(-0.5 * ar, -0.5f, 0.0f, 1.0f); - float4 pt1 = projectionViewModelMatrix * float4m(0.5 * ar, 0.5f, 0.0f, 1.0f); - - // for perspective - pt0.xyz /= pt0.w; - pt1.xyz /= pt1.w; - - float2 ptOrigin = simd::min(pt0.xy, pt1.xy); - float2 ptSize = abs(pt0.xy - pt1.xy); - - // see that rectangle intersects the view, view is -1 to 1 - CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); - CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); - - int32_t numTexturesX = _showSettings->totalChunks(); - int32_t numTexturesY = _showSettings->mipCount; - - if (_showSettings->isShowingAllLevelsAndMips) { - imageRect.origin.y -= (numTexturesY - 1) * imageRect.size.height; - - imageRect.size.width *= numTexturesX; - imageRect.size.height *= numTexturesY; - } - - if (!NSIntersectsRect(imageRect, viewRect)) { - return; - } - - if (_showSettings->panX != panX || _showSettings->panY != panY) { - _showSettings->panX = panX; - _showSettings->panY = panY; - - if (doPrintPanZoom) { - string text; - sprintf(text, - "Pan %.3f,%.3f\n" - "Zoom %.2fx\n", - _showSettings->panX, _showSettings->panY, _showSettings->zoom); - [self setHudText:text.c_str()]; - } - - [self updateEyedropper]; - self.needsDisplay = YES; + // if (doPrintPanZoom) { + // string text; + // sprintf(text, + // "Pan %.3f,%.3f\n" + // "Zoom %.2fx\n", + // _showSettings->panX, _showSettings->panY, _showSettings->zoom); + // [self setHudText:text.c_str()]; + // } + + // Cause a new sample from Metal to eyeDropper + _data.updateEyedropper(); + self.needsDisplay = YES; } } @@ -1832,188 +1345,34 @@ - (BOOL)validateUserInterfaceItem:(id)item return YES; } -- (void)updateUIAfterLoad -{ - // TODO: move these to actions, and test their state instead of looking up - // buttons here and in HandleKey. - - // base on showSettings, hide some fo the buttons - bool isShowAllHidden = - _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1; - - bool isArrayHidden = _showSettings->arrayCount <= 1; - bool isFaceSliceHidden = - _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; - bool isMipHidden = _showSettings->mipCount <= 1; - - bool isJumpToNextHidden = - !(_showSettings->isArchive || _showSettings->isFolder); - - bool isRedHidden = _showSettings->numChannels == 0; // models don't show rgba - bool isGreenHidden = _showSettings->numChannels <= 1; - bool isBlueHidden = _showSettings->numChannels <= 2 && - _showSettings->texContentType != TexContentTypeNormal; // reconstruct z = b on normals - - // TODO: also need a hasAlpha for pixels, since many compressed formats like - // ASTC always have 4 channels but internally store R,RG01,... etc. Can get - // more data from swizzle in the props. Often alpha doesn't store anything - // useful to view. - - // DONE: may want to disable isPremul on block textures that already have - // premul in data or else premul is applied a second time to the visual - - bool hasAlpha = _showSettings->numChannels >= 3; - - bool isAlphaHidden = !hasAlpha; - bool isPremulHidden = !hasAlpha; - bool isCheckerboardHidden = !hasAlpha; - - bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat); - bool isPlayHidden = !_showSettings->isModel; - - _actionPlay->setHidden(isPlayHidden); - _actionArray->setHidden(isArrayHidden); - _actionFace->setHidden(isFaceSliceHidden); - _actionMip->setHidden(isMipHidden); - _actionShowAll->setHidden(isShowAllHidden); - - _actionItem->setHidden(isJumpToNextHidden); - _actionPrevItem->setHidden(isJumpToNextHidden); - - _actionCounterpart->setHidden(isJumpToNextHidden); - _actionPrevCounterpart->setHidden(isJumpToNextHidden); - - _actionR->setHidden(isRedHidden); - _actionG->setHidden(isGreenHidden); - _actionB->setHidden(isBlueHidden); - _actionA->setHidden(isAlphaHidden); - - _actionPremul->setHidden(isPremulHidden); - _actionSigned->setHidden(isSignedHidden); - _actionChecker->setHidden(isCheckerboardHidden); - - // also need to call after each toggle - [self updateUIControlState]; -} - -- (void)updateUIControlState -{ - // there is also mixed state, but not using that - auto On = true; - auto Off = false; - -#define toState(x) (x) ? On : Off - - Renderer* renderer = (Renderer*)self.delegate; - auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips); - auto premulState = toState(_showSettings->isPremul); - auto signedState = toState(_showSettings->isSigned); - auto checkerboardState = toState(_showSettings->isCheckerboardShown); - auto previewState = toState(_showSettings->isPreview); - auto gridState = toState(_showSettings->isAnyGridShown()); - auto wrapState = toState(_showSettings->isWrap); - auto debugState = toState(_showSettings->debugMode != DebugModeNone); - auto playState = toState(_showSettings->isModel && renderer.playAnimations); - auto hudState = toState(_showSettings->isHudShown); - - TextureChannels &channels = _showSettings->channels; - - auto redState = toState(channels == TextureChannels::ModeR001); - auto greenState = toState(channels == TextureChannels::Mode0G01); - auto blueState = toState(channels == TextureChannels::Mode00B1); - auto alphaState = toState(channels == TextureChannels::ModeAAA1); - - auto arrayState = toState(_showSettings->arrayNumber > 0); - auto faceState = toState(_showSettings->faceNumber > 0); - auto mipState = toState(_showSettings->mipNumber > 0); - - auto meshState = toState(_showSettings->meshNumber > 0); - auto meshChannelState = toState(_showSettings->shapeChannel > 0); - auto lightingState = - toState(_showSettings->lightingMode != LightingModeNone); - auto tangentState = toState(_showSettings->useTangent); - - auto verticalState = toState(_buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical); - auto uiState = toState(_buttonStack.hidden); - - _actionVertical->setHighlight(verticalState); - - // TODO: pass boolean, and change in the call - _actionPlay->setHighlight(playState); - _actionHelp->setHighlight(Off); - _actionInfo->setHighlight(Off); - _actionHud->setHighlight(hudState); - - _actionArray->setHighlight(arrayState); - _actionFace->setHighlight(faceState); - _actionMip->setHighlight(mipState); - - // these never show check state - _actionItem->setHighlight(Off); - _actionPrevItem->setHighlight(Off); - - _actionCounterpart->setHighlight(Off); - _actionPrevCounterpart->setHighlight(Off); - - _actionHideUI->setHighlight(uiState); // note below button always off, menu has state - - _actionR->setHighlight(redState); - _actionG->setHighlight(greenState); - _actionB->setHighlight(blueState); - _actionA->setHighlight(alphaState); - - _actionShowAll->setHighlight(showAllState); - _actionPreview->setHighlight(previewState); - _actionShapeMesh->setHighlight(meshState); - _actionShapeChannel->setHighlight(meshChannelState); - _actionLighting->setHighlight(lightingState); - _actionWrap->setHighlight(wrapState); - _actionGrid->setHighlight(gridState); - _actionDebug->setHighlight(debugState); - _actionTangent->setHighlight(tangentState); - - _actionPremul->setHighlight(premulState); - _actionSigned->setHighlight(signedState); - _actionChecker->setHighlight(checkerboardState); -} - -// TODO: convert to C++ actions, and then call into Base holding all this -// move pan/zoom logic too. Then use that as start of Win32 kramv. - - (IBAction)handleAction:(id)sender { NSEvent* theEvent = [NSApp currentEvent]; bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift); + void* senderPtr = (__bridge void*)sender; const Action* action = nullptr; if ([sender isKindOfClass:[NSButton class]]) { - NSButton* button = (NSButton *)sender; - for (const auto& search: _actions) { - if (search.button == button) { - action = &search; - break; - } - } + action = _data.actionFromButton(senderPtr); } else if ([sender isKindOfClass:[NSMenuItem class]]) { - NSMenuItem* menuItem = (NSMenuItem *)sender; - for (const auto& search: _actions) { - if (search.menuItem == menuItem) { - action = &search; - break; - } - } + action = _data.actionFromMenu(senderPtr); } - + if (!action) { KLOGE("kram", "unknown UI element"); return; } - + [self handleEventAction:action isShiftKeyDown:isShiftKeyDown]; } -- (void)keyDown:(NSEvent *)theEvent +- (void)flagsChanged:(NSEvent*)theEvent +{ + _modifierFlags = theEvent.modifierFlags; +} + +- (void)keyDown:(NSEvent*)theEvent { bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift; uint32_t keyCode = theEvent.keyCode; @@ -2021,26 +1380,19 @@ - (void)keyDown:(NSEvent *)theEvent // for now hit esc to hide the table views if (keyCode == Key::Escape) { [self hideFileTable]; - + _hudHidden = false; [self updateHudVisibility]; return; } - - const Action* action = nullptr; - for (const auto& search: _actions) { - if (search.keyCode == keyCode) { - action = &search; - break; - } - } - + + const Action* action = _data.actionFromKey(keyCode); if (!action) { [super keyDown:theEvent]; //KLOGE("kram", "unknown UI element"); return; } - + bool isHandled = [self handleEventAction:action isShiftKeyDown:isShiftKeyDown]; if (!isHandled) { // this will bonk @@ -2070,1177 +1422,277 @@ - (void)updateHudVisibility _hudLabel2.hidden = _hudHidden || !_showSettings->isHudShown; } - - - (bool)handleEventAction:(const Action*)action isShiftKeyDown:(bool)isShiftKeyDown { - // Some data depends on the texture data (isSigned, isNormal, ..) - bool isChanged = false; - bool isStateChanged = false; - - // TODO: fix isChanged to only be set when value changes - // f.e. clamped values don't need to re-render - string text; - Renderer* renderer = (Renderer*)self.delegate; - - if (action == _actionVertical) { - bool isVertical = - _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical; - isVertical = !isVertical; - _buttonStack.orientation = isVertical + ActionState actionState; + if (!_data.handleEventAction(action, isShiftKeyDown, actionState)) + return false; + + // Do the leftover action work to call ObjC + if (action == _data._actionVertical) { + _buttonStack.orientation = _showSettings->isVerticalUI ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal; - text = isVertical ? "Vert UI" : "Horiz UI"; - - // just to update toggle state to Off - isStateChanged = true; - } - else if (action == _actionHideUI) { - // this means no image loaded yet - if (_noImageLoaded) { - return true; - } - - _buttonStack.hidden = !_buttonStack.hidden; - text = _buttonStack.hidden ? "Hide UI" : "Show UI"; - - // just to update toggle state to Off - isStateChanged = true; - } - - else if (action == _actionR) { - if (!action->isHidden) { - TextureChannels& channels = _showSettings->channels; - - if (channels == TextureChannels::ModeR001) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = TextureChannels::ModeR001; - text = "Mask R001"; - } - isChanged = true; - } - - } - else if (action == _actionG) { - if (!action->isHidden) { - TextureChannels& channels = _showSettings->channels; - - if (channels == TextureChannels::Mode0G01) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = TextureChannels::Mode0G01; - text = "Mask 0G01"; - } - isChanged = true; - } - } - else if (action == _actionB) { - if (!action->isHidden) { - TextureChannels& channels = _showSettings->channels; - - if (channels == TextureChannels::Mode00B1) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = TextureChannels::Mode00B1; - text = "Mask 00B1"; - } - - isChanged = true; - } } - else if (action == _actionA) { - if (!action->isHidden) { - TextureChannels& channels = _showSettings->channels; - - if (channels == TextureChannels::ModeAAA1) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = TextureChannels::ModeAAA1; - text = "Mask AAA1"; - } - - isChanged = true; - } - + else if (action == _data._actionHideUI) { + _buttonStack.hidden = _showSettings->isHideUI; } - else if (action == _actionPlay) { - if (!action->isHidden) { - - renderer.playAnimations = !renderer.playAnimations; - - text = renderer.playAnimations ? "Play" : "Pause"; - isChanged = true; - - //[renderer updateAnimationState:self]; - } - else { - //[renderer updateAnimationState:self]; - } - } - else if (action == _actionShapeUVPreview) { - - // toggle state - _showSettings->isUVPreview = !_showSettings->isUVPreview; - text = _showSettings->isUVPreview ? "Show UVPreview" : "Hide UvPreview"; - isChanged = true; - - _showSettings->uvPreviewFrames = 10; - - // also need to call this in display link, for when it reaches end - //[renderer updateAnimationState:self]; - } - - else if (action == _actionShapeChannel) { - _showSettings->advanceShapeChannel(isShiftKeyDown); - - text = _showSettings->shapeChannelText(); - isChanged = true; - } - else if (action == _actionLighting) { - _showSettings->advanceLightingMode(isShiftKeyDown); - text = _showSettings->lightingModeText(); - isChanged = true; - } - else if (action == _actionTangent) { - _showSettings->useTangent = !_showSettings->useTangent; - if (_showSettings->useTangent) - text = "Vertex Tangents"; - else - text = "Fragment Tangents"; - isChanged = true; - } - else if (action == _actionDebug) { - _showSettings->advanceDebugMode(isShiftKeyDown); - text = _showSettings->debugModeText(); - isChanged = true; - } - else if (action == _actionHelp) { - // display the chars for now - text = - "1234-rgba, Preview, Debug, A-show all\n" - "Info, Hud, Reload, 0-fit\n" - "Checker, Grid\n" - "Wrap, 8-signed, 9-premul\n" - "Mip, Face, Y-array\n" - "↓-next item, →-next counterpart\n" - "Lighting, S-shape, C-shape channel\n"; - - // just to update toggle state to Off - isStateChanged = true; - } - - else if (action == _actionFit) { - float zoom; - // fit image or mip - if (isShiftKeyDown) { - zoom = 1.0f; - } - else { - // fit to topmost image - zoom = _showSettings->zoomFit; - } - - // This zoom needs to be checked against zoom limits - // there's a cap on the zoom multiplier. - // This is reducing zoom which expands the image. - zoom *= 1.0f / (1 << _showSettings->mipNumber); - - // even if zoom same, still do this since it resets the pan - _showSettings->zoom = zoom; - - _showSettings->panX = 0.0f; - _showSettings->panY = 0.0f; - - text = "Scale Image\n"; - if (doPrintPanZoom) { - string tmp; - sprintf(tmp, - "Pan %.3f,%.3f\n" - "Zoom %.2fx\n", - _showSettings->panX, _showSettings->panY, _showSettings->zoom); - text += tmp; - } - - isChanged = true; - } - // reload key (also a quick way to reset the settings) - else if (action == _actionReload) { - [self loadTextureFromURL:self.imageURL]; - - // reload at actual size - if (isShiftKeyDown) { - _showSettings->zoom = 1.0f; - } - - // Name change if image - if (_showSettings->isModel) - text = "Reload Model\n"; - else - text = "Reload Image\n"; - if (doPrintPanZoom) { - string tmp; - sprintf(tmp, - "Pan %.3f,%.3f\n" - "Zoom %.2fx\n", - _showSettings->panX, _showSettings->panY, _showSettings->zoom); - text += tmp; - } - - isChanged = true; - } - // P already used for premul - else if (action == _actionPreview) { - _showSettings->isPreview = !_showSettings->isPreview; - isChanged = true; - text = "Preview "; - text += _showSettings->isPreview ? "On" : "Off"; - } - // TODO: might switch c to channel cycle, so could just hit that - // and depending on the content, it cycles through reasonable channel masks - - // toggle checkerboard for transparency - else if (action == _actionChecker) { - if (action->isHidden) { - _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown; - isChanged = true; - text = "Checker "; - text += _showSettings->isCheckerboardShown ? "On" : "Off"; - } - } - - // toggle pixel grid when magnified above 1 pixel, can happen from mipmap - // changes too - else if (action == _actionGrid) { - static int grid = 0; - static const int kNumGrids = 7; - -#define advanceGrid(g, dec) \ -grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids - - // TODO: display how many blocks there are - - // if block size is 1, then this shouldn't toggle - _showSettings->isBlockGridShown = false; - _showSettings->isAtlasGridShown = false; - _showSettings->isPixelGridShown = false; - - advanceGrid(grid, isShiftKeyDown); - - if (grid == 2 && _showSettings->blockX == 1) { - // skip it - advanceGrid(grid, isShiftKeyDown); - } - - static const uint32_t gridSizes[kNumGrids] = { - 0, 1, 2, 32, 64, 128, 256 // atlas sizes - }; - - if (grid == 0) { - sprintf(text, "Grid Off"); - } - else if (grid == 1) { - _showSettings->isPixelGridShown = true; - - sprintf(text, "Pixel Grid 1x1"); - } - else if (grid == 2) { - _showSettings->isBlockGridShown = true; - - sprintf(text, "Block Grid %dx%d", _showSettings->blockX, - _showSettings->blockY); - } - else { - _showSettings->isAtlasGridShown = true; - - // want to be able to show altases tht have long entries derived from - // props but right now just a square grid atlas - _showSettings->gridSizeX = _showSettings->gridSizeY = gridSizes[grid]; - - sprintf(text, "Atlas Grid %dx%d", _showSettings->gridSizeX, - _showSettings->gridSizeY); - } - - isChanged = true; - } - else if (action == _actionShowAll) { - if (!action->isHidden) { - // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips - _showSettings->isShowingAllLevelsAndMips = - !_showSettings->isShowingAllLevelsAndMips; - isChanged = true; - text = "Show All "; - text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off"; - } - } - - // toggle hud that shows name and pixel value under the cursor - // this may require calling setNeedsDisplay on the UILabel as cursor moves - else if (action == _actionHud) { - _showSettings->isHudShown = !_showSettings->isHudShown; + else if (action == _data._actionHud) { [self updateHudVisibility]; - // isChanged = true; - text = "Hud "; - text += _showSettings->isHudShown ? "On" : "Off"; - isStateChanged = true; } - - // info on the texture, could request info from lib, but would want to cache - // that info - else if (action == _actionInfo) { + else if (action == _data._actionInfo) { if (_showSettings->isHudShown) { - sprintf(text, "%s", - isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() - : _showSettings->imageInfo.c_str()); - } - // just to update toggle state to Off - isStateChanged = true; - } - - // toggle wrap/clamp - else if (action == _actionWrap) { - // TODO: cycle through all possible modes (clamp, repeat, mirror-once, - // mirror-repeat, ...) - _showSettings->isWrap = !_showSettings->isWrap; - isChanged = true; - text = "Wrap "; - text += _showSettings->isWrap ? "On" : "Off"; - } - - // toggle signed vs. unsigned - else if (action == _actionSigned) { - if (!action->isHidden) { - _showSettings->isSigned = !_showSettings->isSigned; - isChanged = true; - text = "Signed "; - text += _showSettings->isSigned ? "On" : "Off"; - } - } - - // toggle premul alpha vs. unmul - else if (action == _actionPremul) { - if (!action->isHidden) { - _showSettings->isPremul = !_showSettings->isPremul; - isChanged = true; - text = "Premul "; - text += _showSettings->isPremul ? "On" : "Off"; - } - } - - else if (action == _actionItem || action == _actionPrevItem) { - if (!action->isHidden) { - // invert shift key for prev, since it's reversese - if (action == _actionPrevItem) - isShiftKeyDown = !isShiftKeyDown; - - if (_showSettings->isArchive) { - if ([self advanceFileFromAchive:!isShiftKeyDown]) { - //_hudHidden = true; - //[self updateHudVisibility]; - [self setEyedropperText:""]; - - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; - } - } - else if (_showSettings->isFolder) { - if ([self advanceFileFromFolder:!isShiftKeyDown]) { - //_hudHidden = true; - //[self updateHudVisibility]; - [self setEyedropperText:""]; - - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; - } - } + // also hide the file table, since this can be long + [self hideFileTable]; } } - - else if (action == _actionCounterpart || action == _actionPrevCounterpart) { + else if (action == _data._actionPlay) { if (!action->isHidden) { - // invert shift key for prev, since it's reversese - if (action == _actionPrevCounterpart) - isShiftKeyDown = !isShiftKeyDown; - - /* Archive probably only holds one type of file, could pull in zips? - if (_showSettings->isArchive) { - if ([self advanceCounterpartFromAchive:!isShiftKeyDown]) { - _hudHidden = true; - [self updateHudVisibility]; - - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; - } - } - else */ - - /* TODO: finish this, should only cycle through counterpart files - those are files with same name but different extension under the same folder. - if (_showSettings->isFolder) { - if ([self advanceCounterpartFromFolder:!isShiftKeyDown]) { - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; - } - } - */ - } - } - - // test out different shapes - else if (action == _actionShapeMesh) { - if (_showSettings->meshCount > 1) { - _showSettings->advanceMeshNumber(isShiftKeyDown); - text = _showSettings->meshNumberText(); - isChanged = true; - } - } - - // TODO: should probably have these wrap and not clamp to count limits - - // mip up/down - else if (action == _actionMip) { - if (_showSettings->mipCount > 1) { - if (isShiftKeyDown) { - _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0); - } - else { - _showSettings->mipNumber = - MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1); - } - sprintf(text, "Mip %d/%d", _showSettings->mipNumber, - _showSettings->mipCount); - isChanged = true; + renderer.playAnimations = _showSettings->isPlayAnimations; } } - - else if (action == _actionFace) { - // cube or cube array, but hit s to pick cubearray - if (_showSettings->faceCount > 1) { - if (isShiftKeyDown) { - _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0); - } - else { - _showSettings->faceNumber = - MIN(_showSettings->faceNumber + 1, _showSettings->faceCount - 1); - } - sprintf(text, "Face %d/%d", _showSettings->faceNumber, - _showSettings->faceCount); - isChanged = true; - } + else if (action == _data._actionSrgb) { + // tell the renderer to show one or other view + renderer.isToggleView = !_showSettings->isSRGBShown; } - else if (action == _actionArray) { - // slice - if (_showSettings->sliceCount > 1) { - if (isShiftKeyDown) { - _showSettings->sliceNumber = MAX(_showSettings->sliceNumber - 1, 0); - } - else { - _showSettings->sliceNumber = - MIN(_showSettings->sliceNumber + 1, _showSettings->sliceCount - 1); - } - sprintf(text, "Slice %d/%d", _showSettings->sliceNumber, - _showSettings->sliceCount); - isChanged = true; - } - // array - else if (_showSettings->arrayCount > 1) { - if (isShiftKeyDown) { - _showSettings->arrayNumber = MAX(_showSettings->arrayNumber - 1, 0); - } - else { - _showSettings->arrayNumber = - MIN(_showSettings->arrayNumber + 1, _showSettings->arrayCount - 1); - } - sprintf(text, "Array %d/%d", _showSettings->arrayNumber, - _showSettings->arrayCount); - isChanged = true; - } - } - else { - // non-handled action - return false; + //------------- + // Update everything + if (!actionState.hudText.empty()) { + [self setHudText:actionState.hudText.c_str()]; } - if (!text.empty()) { - [self setHudText:text.c_str()]; + if (actionState.isChanged || actionState.isStateChanged) { + _data.updateUIControlState(); } - if (isChanged || isStateChanged) { - [self updateUIControlState]; - } - - if (isChanged) { + if (actionState.isChanged) { self.needsDisplay = YES; } return true; } -// Note: docs state that drag&drop should be handled automatically by UTI setup -// via openURLs but I find these calls are needed, or it doesn't work. Maybe -// need to register for NSRUL instead of NSPasteboardTypeFileURL. For example, -// in canReadObjectForClasses had to use NSURL. - -// drag and drop support -- (NSDragOperation)draggingEntered:(id)sender -{ - if ((NSDragOperationGeneric & [sender draggingSourceOperationMask]) == - NSDragOperationGeneric) { - NSPasteboard *pasteboard = [sender draggingPasteboard]; - - bool canReadPasteboardObjects = - [pasteboard canReadObjectForClasses:@[ [NSURL class] ] - options:nil]; - - // don't copy dropped item, want to alias large files on disk without that - if (canReadPasteboardObjects) { - return NSDragOperationGeneric; - } - } - - // not a drag we can use - return NSDragOperationNone; -} - -- (BOOL)prepareForDragOperation:(id)sender -{ - return YES; -} - -- (BOOL)performDragOperation:(id)sender -{ - NSPasteboard* pasteboard = [sender draggingPasteboard]; - - NSString* desiredType = [pasteboard availableTypeFromArray:pasteboardTypes]; - - if ([desiredType isEqualToString:NSPasteboardTypeFileURL]) { - // TODO: use readObjects to drag multiple files onto one view - // load one mip of all those, use smaller mips for thumbnail - - // the pasteboard contains a list of filenames - NSString* urlString = - [pasteboard propertyListForType:NSPasteboardTypeFileURL]; - - // this turns it into a real path (supposedly works even with sandbox) - NSURL* url = [NSURL URLWithString:urlString]; - - // convert the original path and then back to a url, otherwise reload fails - // when this file is replaced. - const char* filename = url.fileSystemRepresentation; - if (filename == nullptr) { - KLOGE("kramv", "Fix this drop url returning nil issue"); - return NO; - } - - NSString* filenameString = [NSString stringWithUTF8String:filename]; - - url = [NSURL fileURLWithPath:filenameString]; - - if ([self loadTextureFromURL:url]) { - [self setHudText:""]; - - return YES; - } - } - - return NO; -} - -- (BOOL)loadArchive:(const char *)zipFilename -{ - _zipMmap.close(); - if (!_zipMmap.open(zipFilename)) { - return NO; - } - - // Note: if mmap fails, could read entire zip into memory - // and then still use the same code below. - - if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) { - return NO; - } - - // filter out unsupported extensions - vector extensions = { - ".ktx", ".ktx2", ".png", ".dds" // textures -#if USE_GLTF - , ".glb", ".gltf" // models -#endif - }; - - _zip.filterExtensions(extensions); - - // don't switch to empty archive - if (_zip.zipEntrys().empty()) { - return NO; - } - - // load the first entry in the archive - _fileArchiveIndex = 0; - - // copy names into the files view - [_tableViewController.items removeAllObjects]; - for (const auto& entry: _zip.zipEntrys()) { - const char* filenameShort = toFilenameShort(entry.filename); - [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]]; - } - [_tableView reloadData]; - - // set selection - [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO]; - [_tableView scrollRowToVisible:_fileArchiveIndex]; - - // want it to respond to arrow keys - //[self.window makeFirstResponder: _tableView]; - - // hack to see table - [self hideFileTable]; - - return YES; -} - -- (BOOL)advanceFileFromAchive:(BOOL)increment -{ - if ((!_zipMmap.data()) || _zip.zipEntrys().empty()) { - // no archive loaded or it's empty - return NO; - } - size_t numEntries = _zip.zipEntrys().size(); - - if (increment) - _fileArchiveIndex++; - else - _fileArchiveIndex += numEntries - 1; // back 1 - - _fileArchiveIndex = _fileArchiveIndex % numEntries; - - // set selection - [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileArchiveIndex] byExtendingSelection:NO]; - [_tableView scrollRowToVisible:_fileArchiveIndex]; - - // want it to respond to arrow keys - //[self.window makeFirstResponder: _tableView]; - - // show the files table - [self showFileTable]; - - // also have to hide hud or it will obscure the visible table - //_hudHidden = true; - //[self updateHudVisibility]; - [self setEyedropperText:""]; - - return [self loadFileFromArchive]; -} - -- (BOOL)advanceFileFromFolder:(BOOL)increment -{ - if (_folderFiles.empty()) { - // no archive loaded - return NO; - } - - size_t numEntries = _folderFiles.size(); - if (increment) - _fileFolderIndex++; - else - _fileFolderIndex += numEntries - 1; // back 1 - - _fileFolderIndex = _fileFolderIndex % numEntries; - - // set selection - [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO]; - [_tableView scrollRowToVisible:_fileFolderIndex]; - - // want it to respond to arrow keys - //[self.window makeFirstResponder: _tableView]; - - // show the files table - [self showFileTable]; - - //_hudHidden = true; - //[self updateHudVisibility]; - [self setEyedropperText:""]; - - return [self loadFileFromFolder]; -} - -- (BOOL)setImageFromSelection:(NSInteger)index { - if (_zipMmap.data() && !_zip.zipEntrys().empty()) { - if (_fileArchiveIndex != index) { - _fileArchiveIndex = index; - return [self loadFileFromArchive]; - } - } - - if (!_folderFiles.empty()) { - if (_fileFolderIndex != index) { - _fileFolderIndex = index; - return [self loadFileFromFolder]; - } - } - return NO; -} - -- (BOOL)setShapeFromSelection:(NSInteger)index { - if (_showSettings->meshNumber != index) { - _showSettings->meshNumber = index; - self.needsDisplay = YES; - return YES; - } - return NO; -} - -- (BOOL)findFilenameInFolders:(const string &)filename -{ - // TODO: binary search for the filename in the array, but would have to be in - // same directory - - bool isFound = false; - for (const auto &search : _folderFiles) { - if (search == filename) { - isFound = true; - break; - } - } - return isFound; -} - - - -static string findNormalMapFromAlbedoFilename(const char* filename) -{ - string filenameShort = filename; - - const char* ext = strrchr(filename, '.'); - - auto dotPos = filenameShort.find_last_of("."); - if (dotPos == string::npos) - return ""; - - // now chop off the extension - filenameShort = filenameShort.substr(0, dotPos); - - const char* searches[] = { "-a", "-d" }; - - for (uint32_t i = 0; i < ArrayCount(searches); ++i) { - const char* search = searches[i]; - if (endsWith(filenameShort, search)) { - filenameShort = filenameShort.substr(0, filenameShort.length()-strlen(search)); - break; - } - } - - // may need to try various names, and see if any exist - filenameShort += "-n"; - filenameShort += ext; - - return filenameShort; -} - - -- (BOOL)loadFileFromFolder -{ - // now lookup the filename and data at that entry - const char* filename = _folderFiles[_fileFolderIndex].c_str(); - string fullFilename = filename; - auto timestamp = FileHelper::modificationTimestamp(filename); - - bool isModel = isSupportedModelFilename(filename); - if (isModel) - return [self loadModelFile:nil filename:filename]; - - // have already filtered filenames out, so this should never get hit - if (!isSupportedFilename(filename)) { - return NO; - } - - string normalFilename; - bool hasNormal = false; - - TexContentType texContentType = findContentTypeFromFilename(filename); - if (texContentType == TexContentTypeAlbedo) { - normalFilename = findNormalMapFromAlbedoFilename(filename); - - if (!normalFilename.empty()) - hasNormal = [self findFilenameInFolders:normalFilename]; - } - - //------------------------------- - - KTXImage image; - KTXImageData imageDataKTX; - - KTXImage imageNormal; - KTXImageData imageNormalDataKTX; - - // this requires decode and conversion to RGBA8u - if (!imageDataKTX.open(fullFilename.c_str(), image)) { - return NO; - } - - if (hasNormal && - imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) { - // shaders only pull from albedo + normal on these texture types - if (imageNormal.textureType == image.textureType && - (imageNormal.textureType == MyMTLTextureType2D || - imageNormal.textureType == MyMTLTextureType2DArray)) { - // hasNormal = true; - } - else { - hasNormal = false; - } - } - - Renderer* renderer = (Renderer *)self.delegate; - [renderer releaseAllPendingTextures]; - - if (![renderer loadTextureFromImage:fullFilename.c_str() - timestamp:timestamp - image:image - imageNormal:hasNormal ? &imageNormal : nullptr - isArchive:NO]) { - return NO; - } - - //------------------------------- - - // set title to filename, chop this to just file+ext, not directory - const char* filenameShort = strrchr(filename, '/'); - if (filenameShort == nullptr) { - filenameShort = filename; - } - else { - filenameShort += 1; - } - - // was using subtitle, but that's macOS 11.0 feature. - string title = "kramv - "; - title += formatTypeName(_showSettings->originalFormat); - title += " - "; - title += filenameShort; +// Note: docs state that drag&drop should be handled automatically by UTI setup +// via openURLs but I find these calls are needed, or it doesn't work. Maybe +// need to register for NSURL instead of NSPasteboardTypeFileURL. For example, +// in canReadObjectForClasses had to use NSURL. - self.window.title = [NSString stringWithUTF8String:title.c_str()]; +// drag and drop support +- (NSDragOperation)draggingEntered:(id)sender +{ + if (([sender draggingSourceOperationMask] & NSDragOperationGeneric) == + NSDragOperationGeneric) { + NSPasteboard* pasteboard = [sender draggingPasteboard]; - // doesn't set imageURL or update the recent document menu + bool canReadPasteboardObjects = + [pasteboard canReadObjectForClasses:@[ [NSURL class] ] + options:pasteboardOptions]; - // show the controls - if (_noImageLoaded) { - _buttonStack.hidden = NO; // show controls - _noImageLoaded = NO; + // don't copy dropped item, want to alias large files on disk without that + if (canReadPasteboardObjects) { + return NSDragOperationGeneric; + } } - _showSettings->isArchive = false; - _showSettings->isFolder = true; + // not a drag we can use + return NSDragOperationNone; +} - // show/hide button - [self updateUIAfterLoad]; - - self.needsDisplay = YES; +- (BOOL)prepareForDragOperation:(id)sender +{ return YES; } +- (BOOL)performDragOperation:(id)sender +{ + NSPasteboard* pasteboard = [sender draggingPasteboard]; + + NSArray* urls = [pasteboard readObjectsForClasses:@[ [NSURL class] ] + options:pasteboardOptions]; + int filesCount = [urls count]; + if (filesCount > 0) { + if ([self loadTextureFromURLs:urls]) { + [self setHudText:""]; + return YES; + } + } + return NO; +} -- (BOOL)loadFileFromArchive +- (void)updateFileSelection { - // now lookup the filename and data at that entry - const auto& entry = _zip.zipEntrys()[_fileArchiveIndex]; - const char* filename = entry.filename; - string fullFilename = filename; - double timestamp = (double)entry.modificationDate; + // set selection + uint32_t fileIndex = _data._fileIndex; + [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:fileIndex] byExtendingSelection:NO]; + [_tableView scrollRowToVisible:fileIndex]; +} - bool isModel = isSupportedModelFilename(filename); - if (isModel) - return [self loadModelFile:nil filename:filename]; - - //-------- - - if (!isSupportedFilename(filename)) { - return NO; +- (BOOL)setImageFromSelection:(NSInteger)index +{ + if (!_data._files.empty()) { + if (_data._fileIndex != index) { + _data._fileIndex = index; + return [self loadFile]; + } } - - const uint8_t* imageData = nullptr; - uint64_t imageDataLength = 0; - // search for main file - can be albedo or normal - if (!_zip.extractRaw(filename, &imageData, imageDataLength)) { - return NO; + return NO; +} + +- (BOOL)setShapeFromSelection:(NSInteger)index +{ + if (_showSettings->meshNumber != index) { + _showSettings->meshNumber = index; + self.needsDisplay = YES; + return YES; } + return NO; +} - const uint8_t* imageNormalData = nullptr; - uint64_t imageNormalDataLength = 0; - - string normalFilename; - bool hasNormal = false; +- (BOOL)loadFile +{ + if (_data._files.empty()) + return NO; - - TexContentType texContentType = findContentTypeFromFilename(filename); - if (texContentType == TexContentTypeAlbedo) { - normalFilename = findNormalMapFromAlbedoFilename(filename); - - if (!normalFilename.empty()) - hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData, - imageNormalDataLength); - } + // lookup the filename and data at that entry + const File& file = _data._files[_data._fileIndex]; + const char* filename = file.nameShort.c_str(); - //--------------------------- + setErrorLogCapture(true); - // files in archive are just offsets into the mmap - // That's why we can't just pass filenames to the renderer - KTXImage image; - KTXImageData imageDataKTX; + bool success = _data.loadFile(); - KTXImage imageNormal; - KTXImageData imageNormalDataKTX; + // Update these settings - if (!imageDataKTX.open(imageData, imageDataLength, image)) { - return NO; - } + if (!success) { + string errorText; + getErrorLogCaptureText(errorText); + setErrorLogCapture(false); - if (hasNormal && imageNormalDataKTX.open( - imageNormalData, imageNormalDataLength, imageNormal)) { - // shaders only pull from albedo + normal on these texture types - if (imageNormal.textureType == image.textureType && - (imageNormal.textureType == MyMTLTextureType2D || - imageNormal.textureType == MyMTLTextureType2DArray)) { - // hasNormal = true; - } - else { - hasNormal = false; - } - } + string finalErrorText; + // this does have previous filename set + _data.setFailedText(file.name.c_str(), finalErrorText); + finalErrorText += errorText; - Renderer* renderer = (Renderer *)self.delegate; - [renderer releaseAllPendingTextures]; - - if (![renderer loadTextureFromImage:fullFilename.c_str() - timestamp:(double)timestamp - image:image - imageNormal:hasNormal ? &imageNormal : nullptr - isArchive:YES]) { + [self setHudText:finalErrorText.c_str()]; return NO; } + setErrorLogCapture(false); - //--------------------------------- + //------- + Renderer* renderer = (Renderer*)self.delegate; - // set title to filename, chop this to just file+ext, not directory - const char* filenameShort = strrchr(filename, '/'); - if (filenameShort == nullptr) { - filenameShort = filename; - } - else { - filenameShort += 1; + _showSettings->isSRGBShown = false; + if (success && renderer.hasToggleView) { + _showSettings->isSRGBShown = isSrgbFormat(_showSettings->originalFormat); } - // was using subtitle, but that's macOS 11.0 feature. - string title = "kramv - "; - title += formatTypeName(_showSettings->originalFormat); - title += " - "; - title += filenameShort; + renderer.playAnimations = _showSettings->isPlayAnimations; + renderer.isToggleView = !_showSettings->isSRGBShown; + // ------------- + string title = _showSettings->windowTitleString(filename); self.window.title = [NSString stringWithUTF8String:title.c_str()]; // doesn't set imageURL or update the recent document menu // show the controls - if (_noImageLoaded) { - _buttonStack.hidden = NO; // show controls - _noImageLoaded = NO; + if (_data._noImageLoaded) { + _showSettings->isHideUI = false; + _buttonStack.hidden = NO; // show controls + _data._noImageLoaded = false; } - _showSettings->isArchive = true; - _showSettings->isFolder = false; - // show/hide button - [self updateUIAfterLoad]; + _data.updateUIAfterLoad(); - self.needsDisplay = YES; return YES; } -- (BOOL)loadTextureFromURL:(NSURL *)url +- (void)loadFilesFromUrls:(NSArray*)urls skipSubdirs:(BOOL)skipSubdirs { - // NSLog(@"LoadTexture"); - - // turn back on the hud if was in a list view - _hudHidden = false; - [self updateHudVisibility]; - - const char* filename = url.fileSystemRepresentation; - if (filename == nullptr) { - // Fixed by converting dropped urls into paths then back to a url. - // When file replaced the drop url is no longer valid. - KLOGE("kramv", "Fix this load url returning nil issue"); - return NO; + // convert urls to vector for C++ + vector urlStrings; + for (NSURL* url in urls) { + urlStrings.push_back(url.fileSystemRepresentation); } - - Renderer* renderer = (Renderer *)self.delegate; - - // folders can have a . in them f.e. 2.0/blah/... - bool isDirectory = url.hasDirectoryPath; - - // this likely means it's a local file directory - if (isDirectory) { - // make list of all file in the directory - - if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) { - NSDirectoryEnumerator* directoryEnumerator = - [[NSFileManager defaultManager] - enumeratorAtURL:url - includingPropertiesForKeys:[NSArray array] - options:0 - errorHandler: // nil - ^BOOL(NSURL *urlArg, NSError *error) { - macroUnusedVar(urlArg); - macroUnusedVar(error); - - // handle error - return NO; - }]; - - vector files; -#if USE_GLTF - // only display models in folder if found, ignore the png/jpg files - while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) { - const char* name = fileOrDirectoryURL.fileSystemRepresentation; - - bool isModel = isSupportedModelFilename(name); - if (isModel) - { - files.push_back(name); - } - } -#endif - - // don't change to this folder if it's devoid of content - if (files.empty()) { -#if USE_GLTF - // reset the enumerator - directoryEnumerator = - [[NSFileManager defaultManager] - enumeratorAtURL:url - includingPropertiesForKeys:[NSArray array] - options:0 - errorHandler: // nil - ^BOOL(NSURL* urlArg, NSError* error) { - macroUnusedVar(urlArg); - macroUnusedVar(error); - - // handle error - return NO; - }]; -#endif - while (NSURL* fileOrDirectoryURL = [directoryEnumerator nextObject]) { - const char* name = fileOrDirectoryURL.fileSystemRepresentation; - - if (isSupportedFilename(name)) - { - files.push_back(name); - } - } - } - - if (files.empty()) { - return NO; - } - - // add it to recent docs - NSDocumentController* dc = - [NSDocumentController sharedDocumentController]; - [dc noteNewRecentDocumentURL:url]; - - // sort them -#if USE_EASTL - NAMESPACE_STL::quick_sort(files.begin(), files.end()); -#else - NAMESPACE_STL::sort(files.begin(), files.end()); -#endif - // replicate archive logic below - self.imageURL = url; + // C++ to build list + _data.loadFilesFromUrls(urlStrings, skipSubdirs); - // preserve old folder - string existingFilename; - if (_fileFolderIndex < (int32_t)_folderFiles.size()) - existingFilename = _folderFiles[_fileFolderIndex]; - else - _fileFolderIndex = 0; - - _folderFiles = files; + //------------------- - // TODO: preserve filename before load, and restore that index, by finding - // that name in refreshed folder list + NSMutableDictionary* attribsOff = [NSMutableDictionary dictionaryWithObjectsAndKeys: + //[NSFont systemFontOfSize:64.0],NSFontAttributeName, + [NSColor whiteColor], NSForegroundColorAttributeName, + [NSNumber numberWithFloat:-2.0], NSStrokeWidthAttributeName, + [NSColor blackColor], NSStrokeColorAttributeName, + nil]; - if (!existingFilename.empty()) { - uint32_t index = 0; - for (const auto &fileIt : _folderFiles) { - if (fileIt == existingFilename) { - break; - } - } + // add the files into the file list + [_tableViewController.items removeAllObjects]; + for (const auto& file : _data._files) { + const char* filenameShort = file.nameShort.c_str(); - _fileFolderIndex = index; - } - - [_tableViewController.items removeAllObjects]; - for (const auto& file: files) { - const char* filenameShort = toFilenameShort(file.c_str()); - [_tableViewController.items addObject: [NSString stringWithUTF8String: filenameShort]]; - } - [_tableView reloadData]; - - - [_tableView selectRowIndexes:[NSIndexSet indexSetWithIndex:_fileFolderIndex] byExtendingSelection:NO]; - [_tableView scrollRowToVisible:_fileFolderIndex]; - - [self hideFileTable]; - } + NSString* fileMenuText = [NSString stringWithUTF8String:filenameShort]; + NSMutableAttributedString* fileMenuStr = [[NSMutableAttributedString alloc] initWithString:fileMenuText attributes:attribsOff]; - // now load image from directory - _showSettings->isArchive = false; - _showSettings->isFolder = true; + [_tableViewController.items addObject:fileMenuStr]; + } - - // now load the file at the index - setErrorLogCapture(true); + // reloadData calls selectionDidChange which then sets _fileIndex = 0; + uint32_t fileIndex = _data._fileIndex; + [_tableView reloadData]; + _data._fileIndex = fileIndex; - BOOL success = [self loadFileFromFolder]; + [self updateFileSelection]; + [self hideFileTable]; - if (!success) { - // get back error text from the failed load - string errorText; - getErrorLogCaptureText(errorText); - setErrorLogCapture(false); + // add it to recent docs (only 10 slots) + if (urls.count == 1) { + NSDocumentController* dc = + [NSDocumentController sharedDocumentController]; + [dc noteNewRecentDocumentURL:urls[0]]; + } +} - const string &folder = _folderFiles[_fileFolderIndex]; +- (BOOL)loadTextureFromURLs:(NSArray*)urls +{ + // turn back on the hud if was in a list view + _hudHidden = false; + [self updateHudVisibility]; - // prepend filename - string finalErrorText; - append_sprintf(finalErrorText, "Could not load from folder:\n %s\n", - folder.c_str()); - finalErrorText += errorText; + const char* filename = ""; + NSURL* url = urls[0]; + if ([url.scheme isEqualToString:@"kram"]) { + // the resource specifier has port and other data + // for now treat this as a local file path. - [self setHudText:finalErrorText.c_str()]; - } + // kram://filename.ktx + filename = [url.resourceSpecifier UTF8String]; + filename = filename + 2; // skip the // - setErrorLogCapture(false); - return success; + // can't get Slack to honor links like these + // with a kram:///Users/... + // or with kram://~/blah + // + // Also note that loadFilesFromURLs + // also need this same treatment instead + // of relying on url.fileSystemRepresentation } + else { + filename = url.fileSystemRepresentation; + } + bool isSingleFile = urls.count == 1; - //------------------- - - if (endsWithExtension(filename, ".metallib")) { + Renderer* renderer = (Renderer*)self.delegate; + + // Handle shader hotload + if (isSingleFile && endsWithExtension(filename, ".metallib")) { if ([renderer hotloadShaders:filename]) { NSURL* metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; @@ -3255,129 +1707,20 @@ - (BOOL)loadTextureFromURL:(NSURL *)url return NO; } - // file is not a supported extension - if (!(isSupportedArchiveFilename(filename) || - isSupportedFilename(filename) || - isSupportedModelFilename(filename) - )) - { - string errorText = - "Unsupported file extension, must be .zip" -#if USE_GLTF - ", .gltf, .glb" -#endif - ", .png, .ktx, .ktx2, .dds\n"; - - string finalErrorText; - append_sprintf(finalErrorText, "Could not load from file:\n %s\n", - filename); - finalErrorText += errorText; - - [self setHudText:finalErrorText.c_str()]; - return NO; - } - - if (isSupportedModelFilename(filename)) - { - return [self loadModelFile:url filename:nullptr]; - } - - // for now, knock out model if loading an image - // TODO: might want to unload even before loading a new model - [renderer unloadModel]; - - //------------------- - - if (isSupportedArchiveFilename(filename)) { - auto archiveTimestamp = FileHelper::modificationTimestamp(filename); - - if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || - (self.lastArchiveTimestamp != archiveTimestamp)) { - // copy this out before it's replaced - string existingFilename; - if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size()) - existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename; - else - _fileArchiveIndex = 0; - - BOOL isArchiveLoaded = [self loadArchive:filename]; - if (!isArchiveLoaded) { - return NO; - } - - // store the archive url - self.imageURL = url; - self.lastArchiveTimestamp = archiveTimestamp; - - // add it to recent docs - NSDocumentController* dc = - [NSDocumentController sharedDocumentController]; - [dc noteNewRecentDocumentURL:url]; - - // now reload the filename if needed - if (!existingFilename.empty()) { - const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str()); - if (formerEntry) { - // lookup the index in the remapIndices table - _fileArchiveIndex = - (uintptr_t)(formerEntry - &_zip.zipEntrys().front()); - } - else { - _fileArchiveIndex = 0; - } - } - } - - setErrorLogCapture(true); - - BOOL success = [self loadFileFromArchive]; - - if (!success) { - // get back error text from the failed load - string errorText; - getErrorLogCaptureText(errorText); - setErrorLogCapture(false); - - const auto& entry = _zip.zipEntrys()[_fileArchiveIndex]; - const char* archiveFilename = entry.filename; - - // prepend filename - string finalErrorText; - append_sprintf(finalErrorText, "Could not load from archive:\n %s\n", - archiveFilename); - finalErrorText += errorText; + // don't leave archive table open + if (isSingleFile) + [self hideFileTable]; - [self setHudText:finalErrorText.c_str()]; - } + // only recurse down subdirs if cmd key held during drop or recent menu item selection + bool skipSubdirs = (_modifierFlags & NSEventModifierFlagCommand) == 0; - setErrorLogCapture(false); - return success; - } + [self loadFilesFromUrls:urls skipSubdirs:skipSubdirs]; - bool success = [self loadImageFile:url]; - - // hide table in case last had archive open - if (success) - [self hideFileTable]; - + BOOL success = [self loadFile]; return success; } --(double)getTimestampForFile:(NSURL*)url -{ - // TODO: could just use FileHelper::modificationTimestamp(filename); - - NSDate* fileDate = nil; - NSError* error = nil; - [url getResourceValue:&fileDate - forKey:NSURLContentModificationDateKey - error:&error]; - - double timestamp = fileDate.timeIntervalSince1970; - return timestamp; -} - --(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename +- (BOOL)loadModelFile:(const char*)filename { #if USE_GLTF // Right now can only load these if they are embedded, since sandbox will @@ -3390,153 +1733,33 @@ -(BOOL)loadModelFile:(NSURL*)url filename:(const char*)filename // These assets should be combined into a single hierarchy, and be able to // save out a scene with all of them in a single scene. But that should // probably reference original content in case it's updated. - - Renderer* renderer = (Renderer *)self.delegate; + + // const char* filenameShort = toFilenameShort(filename); + //double timestamp = FileHelper::modificationTimestamp(filename); + + // TODO: this used to compare filename timestamp? + + // This code only takes url, so construct one + Renderer* renderer = (Renderer*)self.delegate; [renderer releaseAllPendingTextures]; - - setErrorLogCapture(true); + BOOL success = [renderer loadModel:filename]; - // set title to filename, chop this to just file+ext, not directory - if (url != nil) - filename = url.fileSystemRepresentation; - const char* filenameShort = toFilenameShort(filename); - - NSURL* gltfFileURL = - [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; - double timestamp = [self getTimestampForFile:gltfFileURL]; - - BOOL success = [renderer loadModel:gltfFileURL]; - // TODO: split this off to a completion handler, since loadModel is async // and should probably also have a cancellation (or counter) - + // show/hide button - [self updateUIAfterLoad]; - - if (!success) { - string errorText; - getErrorLogCaptureText(errorText); - setErrorLogCapture(false); - - string finalErrorText; - append_sprintf(finalErrorText, "Could not load model from file:\n %s\n", - filename); - finalErrorText += errorText; + _data.updateUIAfterLoad(); - [self setHudText:finalErrorText.c_str()]; - + if (!success) { return NO; } - // was using subtitle, but that's macOS 11.0 feature. - string title = "kramv - "; - title += filenameShort; - - self.window.title = [NSString stringWithUTF8String:title.c_str()]; - - // if url is nil, then loading out of archive or folder - // and don't want to save that or set imageURL - if (url != nil) - { - // add to recent docs, so can reload quickly - NSDocumentController* dc = - [NSDocumentController sharedDocumentController]; - [dc noteNewRecentDocumentURL:gltfFileURL]; - - // TODO: not really an image - self.imageURL = gltfFileURL; - - // this may be loading out of folder/archive, but if url passed then it isn't - _showSettings->isArchive = false; - _showSettings->isFolder = false; - - // no need for file table on single files - [self hideFileTable]; - } - - // show the controls - if (_noImageLoaded) { - _buttonStack.hidden = NO; // show controls - _noImageLoaded = NO; - } - - setErrorLogCapture(false); - - // store the filename - _showSettings->lastFilename = filename; - _showSettings->lastTimestamp = timestamp; - - self.needsDisplay = YES; - return success; #else return NO; #endif } --(BOOL)loadImageFile:(NSURL*)url -{ - Renderer* renderer = (Renderer *)self.delegate; - setErrorLogCapture(true); - - // set title to filename, chop this to just file+ext, not directory - const char* filename = url.fileSystemRepresentation; - const char* filenameShort = toFilenameShort(filename); - - BOOL success = [renderer loadTexture:url]; - - if (!success) { - // get back error text from the failed load - string errorText; - getErrorLogCaptureText(errorText); - setErrorLogCapture(false); - - // prepend filename - string finalErrorText; - append_sprintf(finalErrorText, "Could not load from file\n %s\n", filename); - finalErrorText += errorText; - - [self setHudText:finalErrorText.c_str()]; - return NO; - } - setErrorLogCapture(false); - - // was using subtitle, but that's macOS 11.0 feature. - string title = "kramv - "; - title += formatTypeName(_showSettings->originalFormat); - title += " - "; - title += filenameShort; - - self.window.title = [NSString stringWithUTF8String:title.c_str()]; - - // topmost entry will be the recently opened document - // some entries may go stale if directories change, not sure who validates the - // list - - // add to recent document menu - NSDocumentController* dc = [NSDocumentController sharedDocumentController]; - [dc noteNewRecentDocumentURL:url]; - - self.imageURL = url; - - // show the controls - if (_noImageLoaded) { - _buttonStack.hidden = NO; // show controls - _noImageLoaded = NO; - } - - _showSettings->isArchive = false; - _showSettings->isFolder = false; - - // show/hide button - [self updateUIAfterLoad]; - // no need for file table on single files - [self hideFileTable]; - - self.needsDisplay = YES; - return YES; -} - - (void)setupUI { [self hideFileTable]; @@ -3547,42 +1770,9 @@ - (void)concludeDragOperation:(id)sender // did setNeedsDisplay, but already doing that in loadTextureFromURL } -// this doesn't seem to enable New. Was able to get "Open" to highlight by -// setting NSDocument as class for doc types. -// https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/EventArchitecture/EventArchitecture.html -#if 0 -/* -// "New"" calls this -- (__kindof NSDocument *)openUntitledDocumentAndDisplay:(BOOL)displayDocument - error:(NSError * _Nullable *)outError -{ - // TODO: this should add an empty MyMTKView and can drag/drop to that. - // Need to track images and data dropped per view then. - return nil; -} - -// "Open File" calls this -- (void)openDocumentWithContentsOfURL:(NSURL *)url - display:(BOOL)displayDocument - completionHandler:(void (^)(NSDocument *document, BOOL documentWasAlreadyOpen, NSError *error))completionHandler -{ - [self loadTextureFromURL:url]; -} - -- (IBAction)openDocument { - // calls openDocumentWithContentsOfURL above -} - -- (IBAction)newDocument { - // calls openUntitledDocumentAndDisplay above -} -*/ -#endif - -- (void)tableViewSelectionDidChange:(NSNotification *)notification +- (void)tableViewSelectionDidChange:(NSNotification*)notification { - if (notification.object == _tableView) - { + if (notification.object == _tableView) { // image NSInteger selectedRow = [_tableView selectedRow]; [self setImageFromSelection:selectedRow]; @@ -3593,8 +1783,9 @@ - (void)addNotifications { // listen for the selection change messages [[NSNotificationCenter defaultCenter] addObserver:self - selector:@selector(tableViewSelectionDidChange:) - name:NSTableViewSelectionDidChangeNotification object:nil]; + selector:@selector(tableViewSelectionDidChange:) + name:NSTableViewSelectionDidChangeNotification + object:nil]; } - (void)removeNotifications @@ -3603,7 +1794,6 @@ - (void)removeNotifications [[NSNotificationCenter defaultCenter] removeObserver:self]; } - - (BOOL)acceptsFirstResponder { return YES; @@ -3635,7 +1825,7 @@ - (void)viewDidLoad { [super viewDidLoad]; - _view = (MyMTKView *)self.view; + _view = (MyMTKView*)self.view; // have to disable this since reading back from textures // that slows the blit to the screen @@ -3648,9 +1838,9 @@ - (void)viewDidLoad } _renderer = [[Renderer alloc] initWithMetalKitView:_view - settings:_view.showSettings]; + settings:_view.showSettings + data:_view.data]; - // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html // this is better than requesting mousemoved events, they're only sent when // cursor is inside @@ -3667,24 +1857,71 @@ - (void)viewDidLoad [_view addTrackingArea:_trackingArea]; [_view addNotifications]; - + [_view setupUI]; // original sample code was sending down _view.bounds.size, but need // drawableSize this was causing all sorts of inconsistencies [_renderer mtkView:_view drawableSizeWillChange:_view.drawableSize]; + // ObjC++ delegate _view.delegate = _renderer; + + [_renderer setEyedropperDelegate:_view]; } +@end +bool DataDelegate::loadFile(bool clear) +{ + MyMTKView* view_ = (__bridge MyMTKView*)view; -@end + if (clear) { + // set selection + [view_ updateFileSelection]; + + // want it to respond to arrow keys + //[self.window makeFirstResponder: _tableView]; + + // show the files table + [view_ showFileTable]; + [view_ setEyedropperText:""]; + } + + return [view_ loadFile]; +} + +bool DataDelegate::loadModelFile(const char* filename) +{ + MyMTKView* view_ = (__bridge MyMTKView*)view; + return [view_ loadModelFile:filename]; +} + +bool DataDelegate::loadTextureFromImage(const char* fullFilename, double timestamp, KTXImage& image, KTXImage* imageNormal, KTXImage* imageDiff, bool isArchive) +{ + MyMTKView* view_ = (__bridge MyMTKView*)view; + Renderer* renderer = (Renderer*)view_.delegate; + [renderer releaseAllPendingTextures]; + + if (![renderer loadTextureFromImage:fullFilename + timestamp:timestamp + image:image + imageNormal:imageNormal + imageDiff:imageDiff + isArchive:isArchive]) { + return false; + } + + return true; +} //------------- -int main(int argc, const char *argv[]) +int main(int argc, const char* argv[]) { + ThreadInfo infoMain = {"Main", ThreadPriority::Interactive, 0}; + setThreadInfo(infoMain); + @autoreleasepool { // Setup code that might create autoreleased objects goes here. } diff --git a/kramv/Shaders/KramShaders.h b/kramv/Shaders/KramShaders.h index 6e337c20..bde752e5 100644 --- a/kramv/Shaders/KramShaders.h +++ b/kramv/Shaders/KramShaders.h @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -8,9 +8,10 @@ #ifndef __METAL_VERSION__ #import #else +#define SIMD_NAMESPACE simd +#import #include #endif -#import #ifdef __METAL_VERSION__ #define NS_ENUM(_type, _name) \ @@ -27,6 +28,7 @@ typedef NS_ENUM(int32_t, BufferIndex) { BufferIndexUniforms = 16, BufferIndexUniformsLevel = 17, + BufferIndexUniformsDebug = 18, // for compute BufferIndexUniformsCS = 16, @@ -42,8 +44,9 @@ typedef NS_ENUM(int32_t, VertexAttribute) { typedef NS_ENUM(int32_t, TextureIndex) { TextureIndexColor = 0, TextureIndexNormal = 1, + TextureIndexDiff = 2, - TextureIndexSamples = 2, // used for compute + TextureIndexSamples = 3, // used for compute }; typedef NS_ENUM(int32_t, SamplerIndex) { @@ -109,22 +112,25 @@ typedef NS_ENUM(int32_t, ShaderLightingMode) { // TODO: placement of these elements in the struct breaks transfer // of data. This seems to work. Alignment issues with mixing these differently. struct Uniforms { - simd::float4x4 projectionViewMatrix; - simd::float4x4 modelMatrix; - simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant - simd::float3 cameraPosition; // world-space + SIMD_NAMESPACE::float4x4 projectionViewMatrix; + SIMD_NAMESPACE::float4x4 modelMatrix; + //SIMD_NAMESPACE::float4 modelMatrixInvScale2; // to supply inverse, w is determinant + SIMD_NAMESPACE::float3 cameraPosition; // world-space float uvPreview; + float uvToShapeRatio; bool isSigned; bool isNormal; bool isSwizzleAGToRG; - bool isPremul; + bool doShaderPremul; bool isCheckerboardShown; bool isWrap; bool isSDF; bool isPreview; // render w/lighting, normals, etc bool isUVPreview; // show uv overlay + bool isSrgbInput; + bool isDiff; bool is3DView; bool isNormalMapPreview; // for isNormal or combined @@ -171,18 +177,22 @@ struct UniformsLevel { uint32_t mipLOD; uint32_t face; uint32_t arrayOrSlice; - simd::float2 drawOffset; // pixel offset to apply - simd::float4 textureSize; // width, height, 1/width, 1/height + SIMD_NAMESPACE::float2 drawOffset; // pixel offset to apply + SIMD_NAMESPACE::float4 textureSize; // width, height, 1/width, 1/height uint32_t passNumber; // switch to enum }; // This is all tied to a single level sample struct UniformsCS { - simd::uint2 uv; + SIMD_NAMESPACE::int2 uv; uint32_t arrayOrSlice; uint32_t face; uint32_t mipLOD; }; +struct UniformsDebug { + SIMD_NAMESPACE::float4 rect; +}; + #endif diff --git a/kramv/Shaders/KramShaders.metal b/kramv/Shaders/KramShaders.metal index e5301a0c..3228c91c 100644 --- a/kramv/Shaders/KramShaders.metal +++ b/kramv/Shaders/KramShaders.metal @@ -1,4 +1,4 @@ -// kram - Copyright 2020-2022 by Alec Miller. - MIT License +// kram - Copyright 2020-2025 by Alec Miller. - MIT License // The license and copyright notice shall be included // in all copies or substantial portions of the Software. @@ -519,24 +519,28 @@ inline float3x3 toFloat3x3(float4x4 m) return float3x3(m[0].xyz, m[1].xyz, m[2].xyz); } +// This works even with scale of 0 and is correct over using invT. +// The normal will be normalize anyways. Also saves sending down another tfm. +inline float3x3 adjoint(float3x3 m) +{ + return float3x3(cross(m[1], m[2]), + cross(m[2], m[0]), + cross(m[0], m[1])); +} + + // this is for vertex shader if tangent supplied void transformBasis(thread float3& normal, thread float3& tangent, - float4x4 modelToWorldTfm, float3 invScale2, bool useTangent) + float4x4 modelToWorldTfm, bool useTangent) { float3x3 m = toFloat3x3(modelToWorldTfm); - // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below - // but uniform scale already handled by normalize - normal = m * normal; - normal *= invScale2; + normal = adjoint(m) * normal; normal = normalize(normal); - // question here of whether tangent is transformed by m or mInvT - // most apps assume m, but after averaging it can be just as off the surface as the normal if (useTangent) { tangent = m * tangent; - tangent *= invScale2; tangent = normalize(tangent); } @@ -608,7 +612,6 @@ ColorInOut DrawImageFunc( uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent; float4 position = in.position; - //position.xy += uniformsLevel.drawOffset; float3 normal = in.normal; float4 tangent = in.tangent; @@ -623,7 +626,7 @@ ColorInOut DrawImageFunc( if (needsWorldBasis) { float3 t = tangent.xyz; - transformBasis(normal, t, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent); + transformBasis(normal, t, uniforms.modelMatrix, uniforms.useTangent); tangent.xyz = t; out.normal = toHalf(normal); @@ -916,12 +919,48 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) { // This is writing out to 16F and could write snorm data, but then that couldn't be displayed. // So code first converts to Unorm. +float srgbToLinear(float s) { + s = saturate(s); + //return (s < 0.04044823) ? (s / 12.92) + // : pow((s + 0.055) / 1.055, 2.4); + + return (s < 0.04044823) ? (s * ( 1.0 / 12.92 )) + : pow((s + 0.055) * ( 1.0 / 1.055 ), 2.4); + +} + +void shaderPremul(thread float4& c, bool isSrgbInput) { + float alpha = c.a; + + // This is because Figma/PS do all filering/blends in premul srgb. + // So srgbToLinear( srgb * a ) = srgbToLinear( srgb ) * srgbToLinear( a ). + // Emulating srgb blends also takes two alpha or access to dst. + // Shader would need to export srgbToLinear( 1 - a ) for dstColor, + // but that's not correct to blend alpha. Alpha still needs linear 1-a. + + // TODO: make this one of the premul options (default for png) + if (isSrgbInput) + { + // can tell difference in last 20% of TestAlphaGradient.png + //c.a = pow(c.a, 2.2); // approx + + c.a = srgbToLinear(c.a); + } + + c = toPremul(c); + + // Note: be careful fromPremul would need to do similar math + // have an alpha that is much smaller in rgb, than the blend alpha. + c.a = alpha; +} + float4 DrawPixels( ColorInOut in [[stage_in]], bool facing, constant Uniforms& uniforms, float4 c, float4 nmap, + float4 cDiff, float2 textureSize, uint passNumber ) @@ -951,13 +990,13 @@ float4 DrawPixels( // adapted for signed field above, // sdf distance from edge (scalar) float dist = c.r; - + // size of one pixel line float onePixel = recip(max(0.0001, length(float2(dfdx(dist), dfdy(dist))))); - + // distance to edge in pixels (scalar) float pixelDist = dist * onePixel; - + // typically source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges // store as preml alpha @@ -968,13 +1007,13 @@ float4 DrawPixels( half4 nmapH = toHalf(c); half3 n = transformNormal(nmapH, in.normal, in.tangent, - in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN - uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); + in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN + uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition); c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode); - + c.a = 1; } else { @@ -987,10 +1026,10 @@ float4 DrawPixels( if (uniforms.isNormalMapPreview) { half4 nmapH = toHalf(nmap); - + half3 n = transformNormal(nmapH, in.normal, in.tangent, - in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN - uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); + in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN + uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode); } @@ -1000,8 +1039,8 @@ float4 DrawPixels( } // to premul, but also need to see without premul - if (uniforms.isPremul) { - c = toPremul(c); + if (uniforms.doShaderPremul) { + shaderPremul(c, uniforms.isSrgbInput); } } @@ -1011,6 +1050,31 @@ float4 DrawPixels( c = float4(fract(in.texCoord), 0.0, 1.0); } } + else if (uniforms.isDiff) { + // This is only for 2d textures right now + // Could run the color reconstruct logic below. + + float4 cDelta = abs(c - cDiff); + + switch(uniforms.channels) + { + case ShModeRGBA: break; + + // with premul formats, already have ra,ga,ba + case ShModeR001: cDelta = float4(cDelta.r,0,0,0); break; + case ShMode0G01: cDelta = float4(0,cDelta.g,0,0); break; + case ShMode00B1: cDelta = float4(0,0,cDelta.b,0); break; + case ShModeAAA1: cDelta = float4(0,0,0,cDelta.a); break; + } + + const float delta = 1.0/255.0; + bool isDiffDetected = any( cDelta > delta ); + + if (isDiffDetected) { + float3 highlightColor = float3(1, 0, 1); + c.rgb = highlightColor; + } + } else { // handle single channel and SDF content if (uniforms.numChannels == 1) { @@ -1048,18 +1112,18 @@ float4 DrawPixels( // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb? // May want to display those as 0 not 0.5. if (uniforms.isSigned) { - // Note: premul on signed should occur while still signed, since it's a pull to zoer + // Note: premul on signed should occur while still signed, since it's a pull to zero // to premul, but also need to see without premul - if (uniforms.isPremul) { - c = toPremul(c); + if (uniforms.doShaderPremul) { + shaderPremul(c, uniforms.isSrgbInput); } sc = c; c.xyz = toUnorm(c.xyz); } else { - if (uniforms.isPremul) { - c = toPremul(c); + if (uniforms.doShaderPremul) { + shaderPremul(c, uniforms.isSrgbInput); } } @@ -1125,9 +1189,9 @@ float4 DrawPixels( else if (uniforms.shapeChannel == ShShapeChannelMipLevel) { c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures } -// else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) { -// c.rgb = saturate(bumpNormal); -// } + // else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) { + // c.rgb = saturate(bumpNormal); + // } if (uniforms.shapeChannel != ShShapeChannelMipLevel) { c.a = 1.0; @@ -1136,20 +1200,23 @@ float4 DrawPixels( // mask to see one channel in isolation, this is really 0'ing out other channels // would be nice to be able to keep this set on each channel independently. - switch(uniforms.channels) + if (!uniforms.isDiff) { - case ShModeRGBA: break; - - // with premul formats, already have ra,ga,ba - case ShModeR001: c = float4(c.r,0,0,1); break; - case ShMode0G01: c = float4(0,c.g,0,1); break; - case ShMode00B1: c = float4(0,0,c.b,1); break; - -// case ShModeRRR1: c = float4(c.rrr,1); break; -// case ShModeGGG1: c = float4(c.ggg,1); break; -// case ShModeBBB1: c = float4(c.bbb,1); break; -// - case ShModeAAA1: c = float4(c.aaa,1); break; + switch(uniforms.channels) + { + case ShModeRGBA: break; + + // with premul formats, already have ra,ga,ba + case ShModeR001: c = float4(c.r,0,0,1); break; + case ShMode0G01: c = float4(0,c.g,0,1); break; + case ShMode00B1: c = float4(0,0,c.b,1); break; + + // case ShModeRRR1: c = float4(c.rrr,1); break; + // case ShModeGGG1: c = float4(c.ggg,1); break; + // case ShModeBBB1: c = float4(c.bbb,1); break; + // + case ShModeAAA1: c = float4(c.aaa,1); break; + } } // be able to pinch-zoom into/back from the image @@ -1164,8 +1231,12 @@ float4 DrawPixels( // fix that. Also make this scale with zoom. // https://www.geeks3d.com/hacklab/20190225/demo-checkerboard-in-glsl/ + + float2 coord = in.texCoord; + coord.x *= uniforms.uvToShapeRatio; + float repeats = 20.0; - float2 checker = floor(repeats * in.texCoord); + float2 checker = floor(repeats * coord); float selector = sign(fmod(checker.x + checker.y, 2.0)); float cb = mix(float(1), float(222.0/255.0), selector); @@ -1320,7 +1391,7 @@ fragment float4 Draw1DArrayPS( // colorMap.get_num_mip_levels(); float4 n = float4(0,0,1,1); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber); } fragment float4 DrawImagePS( @@ -1330,18 +1401,20 @@ fragment float4 DrawImagePS( constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], texture2d colorMap [[ texture(TextureIndexColor) ]], - texture2d normalMap [[ texture(TextureIndexNormal) ]] + texture2d normalMap [[ texture(TextureIndexNormal) ]], + texture2d diffMap [[ texture(TextureIndexDiff) ]] ) { float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy); float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy); - + float4 d = diffMap.sample(colorSampler, in.texCoordXYZ.xy); + // here are the pixel dimensions of the lod uint lod = uniformsLevel.mipLOD; float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, d, textureSize, uniformsLevel.passNumber); } fragment float4 DrawImageArrayPS( @@ -1362,7 +1435,7 @@ fragment float4 DrawImageArrayPS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber); } @@ -1384,7 +1457,7 @@ fragment float4 DrawCubePS( // colorMap.get_num_mip_levels(); float4 n = float4(0,0,1,1); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber); } fragment float4 DrawCubeArrayPS( @@ -1405,7 +1478,7 @@ fragment float4 DrawCubeArrayPS( // colorMap.get_num_mip_levels(); float4 n = float4(0,0,1,1); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber); } @@ -1437,12 +1510,11 @@ fragment float4 DrawVolumePS( // colorMap.get_num_mip_levels(); float4 n = float4(0,0,1,1); - return DrawPixels(in, facing, uniforms, c, n, textureSize, uniformsLevel.passNumber); + return DrawPixels(in, facing, uniforms, c, n, c, textureSize, uniformsLevel.passNumber); } //-------------------------------------------------- - /* not using this yet, need a fsq and some frag coord to sample the normal map at discrete points // https://www.shadertoy.com/view/4s23DG @@ -1561,11 +1633,11 @@ kernel void SampleImageCS( { // the for-loop is replaced with a collection of threads, each of which // calls this function. - uint2 uv = uniforms.uv; // tie into texture lookup + int2 uv = uniforms.uv; // tie into texture lookup // uv >>= uniforms.mipLOD; // the color is returned to linear rgba32f - float4 color = colorMap.read(uv, uniforms.mipLOD); + float4 color = colorMap.read((uint2)uv, uniforms.mipLOD); result.write(color, index); } @@ -1578,13 +1650,13 @@ kernel void SampleImageArrayCS( { // the for-loop is replaced with a collection of threads, each of which // calls this function. - uint2 uv = uniforms.uv; // tie into texture lookup + int2 uv = uniforms.uv; // tie into texture lookup //uv >>= uniforms.mipLOD; uint arrayOrSlice = uniforms.arrayOrSlice; // the color is returned to linear rgba32f - float4 color = colorMap.read(uv, arrayOrSlice, uniforms.mipLOD); + float4 color = colorMap.read((uint2)uv, arrayOrSlice, uniforms.mipLOD); result.write(color, index); } @@ -1637,7 +1709,7 @@ kernel void SampleVolumeCS( { // the for-loop is replaced with a collection of threads, each of which // calls this function. - uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup + uint3 uv = uint3((uint2)uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup //uv >>= uniforms.mipLOD); // the color is returned to linear rgba32f @@ -1645,4 +1717,58 @@ kernel void SampleVolumeCS( result.write(color, index); } +//-------------------------------------------------- + +// TODO: use instancing to draw these, since to avoid diagonal +// have to use line strip and prim reset, or 4 line segs. +struct VertexLinesInput +{ + //float4 position [[attribute(VertexAttributePosition)]]; + float2 texCoord [[attribute(VertexAttributeTexcoord)]]; +}; + +struct VertexLinesOutput +{ + float4 position [[position]]; +}; + +vertex VertexLinesOutput DrawLinesVS( + VertexLinesInput in [[stage_in]], + constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], + constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], + constant UniformsDebug& uniformsDebug [[ buffer(BufferIndexUniformsDebug) ]]) +{ + float4 rect = uniformsDebug.rect; + + // this reverses orient when applied to meshRect, + // due to using uv + float2 pos = (rect.xy + in.texCoord * rect.zw) - float2(0.5); + pos.y = -pos.y; + + float4 worldPos = uniforms.modelMatrix * float4(pos, 0.0, 1.0); + + // try adding pixel offset to pixel values + worldPos.xy += uniformsLevel.drawOffset; + + VertexLinesOutput out; + out.position = uniforms.projectionViewMatrix * worldPos; + + // for now, to always have it show up, can't see to bias properly + out.position.z = 1.0; + + // bias it + // use hw bias instead, but image is at 0.9993 + // and this will be at 0.9994 which is closer w/reverseZ + // out.position.z += 0.0001; + + return out; +} +fragment float4 DrawLinesPS( + VertexLinesOutput in [[stage_in]]) +{ + // TODO: could switch color for contrast on white + // if could read the underlying image color or usr fb fetch. + + return float4(1.0); +} diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index 479589bf..814aae65 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -1,28 +1,3 @@ -#cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR) - -#----------------------------------------------------- - -set(BUILD_IOS FALSE) -set(BUILD_MAC FALSE) -set(BUILD_WIN FALSE) -set(BUILD_UNIX FALSE) - -if (APPLE) - if (CMAKE_SYSTEM_NAME STREQUAL "iOS") - message("build for iOS") - set(BUILD_IOS TRUE) - else() - message("build for macOS") - set(BUILD_MAC TRUE) - endif() -elseif (WIN32) - message("build for win x64") - set(BUILD_WIN TRUE) -elseif (UNIX AND NOT APPLE) - message("build for unix") - set(BUILD_UNIX TRUE) -endif() - #----------------------------------------------------- # TODO: hook these up to added code below, and pass to as command line settings @@ -43,7 +18,7 @@ set(COMPILE_SQUISH 0) set(COMPILE_ASTCENC 0) set(COMPILE_COMP 0) -if (ATE AND (BUILD_MAC OR BUILD_IOS)) +if (ATE AND BUILD_MAC) set(COMPILE_ATE 1) endif() @@ -67,10 +42,11 @@ if (COMP) set(COMPILE_COMP 1) endif() +#----------------------------------------------------- +# stl used -# this isn't an encoder, but replaces stl with eastl +# replaces parts of std/stl with eastl set(COMPILE_EASTL 0) - if (EASTL) set(COMPILE_EASTL 1) endif() @@ -101,13 +77,13 @@ else() ) endif() -set_target_properties(${myTargetLib} PROPERTIES - # turn off pch - DISABLE_PRECOMPILE_HEADERS ON -) - # add_library doesn't establish a project, so still pointing at root CMake -set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram) +#set(SOURCE_DIR ${PROJECT_SOURCE_DIR}/libkram) +#set(SOURCE_DIR ${CMAKE_SOURCE_DIR}/libkram) +set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + +#set(INCLUDE_DIR ${CMAKE_SOURCE_DIR}/libkram) +set(INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) file(GLOB_RECURSE libSources CONFIGURE_DEPENDS "${SOURCE_DIR}/astc-encoder/*.cpp" @@ -123,9 +99,13 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS "${SOURCE_DIR}/etc2comp/*.cpp" "${SOURCE_DIR}/etc2comp/*.h" + "${SOURCE_DIR}/fmt/*.cpp" + "${SOURCE_DIR}/fmt/*.h" + "${SOURCE_DIR}/heman/hedistance.cpp" "${SOURCE_DIR}/heman/hedistance.h" + "${SOURCE_DIR}/kram/*.pch" "${SOURCE_DIR}/kram/*.cpp" "${SOURCE_DIR}/kram/*.h" @@ -144,6 +124,9 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS "${SOURCE_DIR}/tmpfileplus/tmpfileplus.cpp" "${SOURCE_DIR}/tmpfileplus/tmpfileplus.h" + + "${SOURCE_DIR}/vectormath/*.h" + "${SOURCE_DIR}/vectormath/*.cpp" # partial zstd decode-only unity file # cd zstd/build/single_file_libs @@ -161,10 +144,7 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS ) # no objc on win or linux -if (BUILD_WIN) - list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$") - list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$") -elseif (BUILD_UNIX) +if (BUILD_WIN OR BUILD_LINUX) list(FILTER libSources EXCLUDE REGEX ".*ateencoder.mm$") list(FILTER libSources EXCLUDE REGEX ".*ateencoder.h$") endif() @@ -186,103 +166,116 @@ list(FILTER libSources EXCLUDE REGEX ".astcenccli_toplevel.cpp$") list(FILTER libSources EXCLUDE REGEX ".atomic.h$") list(FILTER libSources EXCLUDE REGEX ".atomic.cpp$") +# this is trying to setup C++20 module, but that causes compile failures +list(FILTER libSources EXCLUDE REGEX ".fmt.cpp$") + # this will preserve hierarchy of sources in a build project source_group(TREE "${SOURCE_DIR}" PREFIX "source" FILES ${libSources}) target_include_directories(${myTargetLib} PUBLIC - "${SOURCE_DIR}/kram/" - - # why are these public, must be in public headers - "${SOURCE_DIR}/eastl/include/" + # public since included in other project files + "${INCLUDE_DIR}/kram/" + "${INCLUDE_DIR}/eastl/include/" + "${INCLUDE_DIR}/vectormath/" ) target_include_directories(${myTargetLib} PRIVATE - "${SOURCE_DIR}/astc-encoder/" - "${SOURCE_DIR}/ate/" - "${SOURCE_DIR}/bc7enc/" - "${SOURCE_DIR}/compressonator/bc6h/" - "${SOURCE_DIR}/etc2comp/" - "${SOURCE_DIR}/heman/" - "${SOURCE_DIR}/lodepng" - "${SOURCE_DIR}/miniz/" - "${SOURCE_DIR}/squish/" - "${SOURCE_DIR}/tmpfileplus/" - "${SOURCE_DIR}/zstd/" - ) + "${INCLUDE_DIR}/astc-encoder/" + "${INCLUDE_DIR}/ate/" + "${INCLUDE_DIR}/bc7enc/" + "${INCLUDE_DIR}/compressonator/bc6h/" + "${INCLUDE_DIR}/etc2comp/" + "${INCLUDE_DIR}/fmt/" + "${INCLUDE_DIR}/heman/" + "${INCLUDE_DIR}/lodepng" + "${INCLUDE_DIR}/miniz/" + "${INCLUDE_DIR}/squish/" + "${INCLUDE_DIR}/tmpfileplus/" + "${INCLUDE_DIR}/zstd/" +) # only add sources to the library target_sources(${myTargetLib} PRIVATE ${libSources}) +#----------------------------------------------------- + +if (BUILD_LINUX) + # Enable all warnings, and also enable f16c sims op (only x64 though) + target_compile_options(${myTargetLib} PRIVATE -W -Wall + -mavx2 -mfma -mf16c + -fno-exceptions -fno-rtti + -fdata-sections -ffunction-sections + ) + + # this is already done by pch for libkram, but other projects need the force include inherited + # force include (public) + target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h) +endif() + +# note: mac build is all done via Xcode workspace/project now, this cmake build is legacy if (BUILD_MAC) set_target_properties(${myTargetLib} PROPERTIES - # Note: match this up with CXX version - # c++11 min - XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14" - XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" - - # avx1 (ignored by universal?) - XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx" - - # turn off exceptions/rtti - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO - - # can't believe this isn't on by default in CMAKE - XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES + # set debug style for apps + XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym" + XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO" ) - target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall) - -elseif (BUILD_IOS) - set_target_properties(${myTargetLib} PROPERTIES - # Note: match this up with CXX version - # c++11 min - XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14" - XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" - - # avx1 - #XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx" - - # turn off exceptions/rtti - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO - XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO - - # can't believe this isn't on by default in CMAKE - XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES + # Enable all warnings, and also enable f16c sims op (only x64 though) + target_compile_options(${myTargetLib} PRIVATE -W -Wall + -mavx2 -mfma -mf16c + -fobjc-arc + -fno-exceptions -fno-rtti + -fdata-sections -ffunction-sections ) - target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall) - -elseif (BUILD_WIN) - + # this is already done by pch for libkram, but other projects need the force include inherited + # force include (public) + target_compile_options(${myTargetLib} PUBLIC -include KramConfig.h) +endif() + +if (BUILD_WIN) # TODO: switch to add_target_definitions - # to turn off exceptions/rtti use /GR and /EHsc replacement - string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set_property(TARGET ${myTargetLib} PROPERTY + MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>" + ) - # force include - target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h) - - # all warnings, AVX1, and multiprocess compiles - target_compile_options(${myTargetLib} PRIVATE /W3 /arch:AVX /MP) - - # fix STL - target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0") - -elseif (BUILD_UNIX) - # TODO: finish this - - target_compile_options(${myTargetLib} PRIVATE -include KramConfig.h -W -Wall) + # this is already done by pch for libkram, but other projects need the force include inherited + # force include (public) + target_compile_options(${myTargetLib} PUBLIC /FIKramConfig.h) + + # all warnings, AVX2, and multiprocess compiles, + # eliminate duplicate strings, embed full path + # TODO: -fdata-sections -ffunction-sections + target_compile_options(${myTargetLib} PRIVATE /W3 + -march=haswell -mf16c -mfma + /GR- /EHs-c- + /MP /GF /FC + ) + # fix STL (don't use -D here, will remove) + target_compile_definitions(${myTargetLib} PRIVATE _ITERATOR_DEBUG_LEVEL=0 _HAS_EXCEPTIONS=0) + endif() -target_compile_definitions(${myTargetLib} PUBLIC - "-DCOMPILE_ATE=${COMPILE_ATE}" - "-DCOMPILE_BCENC=${COMPILE_BCENC}" - "-DCOMPILE_ETCENC=${COMPILE_ETCENC}" - "-DCOMPILE_SQUISH=${COMPILE_SQUISH}" - "-DCOMPILE_ASTCENC=${COMPILE_ASTCENC}" - "-DCOMPILE_COMP=${COMPILE_COMP}" - "-DCOMPILE_EASTL=${COMPILE_EASTL}" + + +# This will be force include (-include, /FI) on GCC/clang/VS. +# Can't seem to ref KramPrefix.pch file. Goes into cmake_pch.hxx file +target_precompile_headers(${myTargetLib} PRIVATE + ${SOURCE_DIR}/kram/KramPrefix.h +) + +# These turns into -D, don't add to the lines +target_compile_definitions(${myTargetLib} + PUBLIC + COMPILE_EASTL=${COMPILE_EASTL} + + PRIVATE + COMPILE_ATE=${COMPILE_ATE} + COMPILE_BCENC=${COMPILE_BCENC} + COMPILE_ETCENC=${COMPILE_ETCENC} + COMPILE_SQUISH=${COMPILE_SQUISH} + COMPILE_ASTCENC=${COMPILE_ASTCENC} + COMPILE_COMP=${COMPILE_COMP} ) diff --git a/libkram/allocate/dlmalloc.cpp b/libkram/allocate/dlmalloc.cpp new file mode 100644 index 00000000..4994b755 --- /dev/null +++ b/libkram/allocate/dlmalloc.cpp @@ -0,0 +1,4833 @@ +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/publicdomain/zero/1.0/ Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu +*/ + +/* + C++ version by Gregory Popovitch (greg7mdp@gmail.com) and released + to the public domain as well, as explained at + http://creativecommons.org/publicdomain/zero/1.0/. + + based on Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea + (ftp://gee.cs.oswego.edu/pub/misc/malloc.c) +*/ + +/* + Further mods by Alec Miller as part of Kram. +*/ + +/* + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.6.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (minimum) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + It is also possible to limit the maximum total allocatable + space, using malloc_set_footprint_limit. This is not + designed as a security feature in itself (calls to set limits + are not screened or privileged), but may be useful as one + aspect of a secure implementation. + + Thread-safety: thread-safe unless USE_LOCKS defined to zero + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with a lock. By default, this uses a plain + pthread mutex, win32 critical section, or a spin-lock if if + available for the platform and not disabled by setting + USE_SPIN_LOCKS=0. However, if USE_RECURSIVE_LOCKS is defined, + recursive versions are used instead (which are not required for + base functionality but may be needed in layered extensions). + Using a global lock is not especially fast, and can be a major + bottleneck. It is designed only to provide minimal protection + in concurrent environments, and to provide a basis for + extensions. If you are using malloc in a concurrent program, + consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived from + versions of this malloc. + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "USE_DL_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. Normally, this requires use of locks. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. Note: + in cases where MSC and gcc (cygwin) are known to differ on WIN32, + conditions use _MSC_VER to distinguish them. + +DLMALLOC_EXPORT default: extern + Defines how public APIs are declared. If you want to export via a + Windows DLL, you might define this as + #define DLMALLOC_EXPORT extern __declspec(dllexport) + If you want a POSIX ELF shared object, you might use + #define DLMALLOC_EXPORT extern __attribute__((visibility("default"))) + +MALLOC_ALIGNMENT default: (size_t)(2 * sizeof(void *)) + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 1 (true) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and spin locks available + If true, uses custom spin locks for locking. This is currently + supported only gcc >= 4.1, older gccs on x86 platforms, and recent + MS compilers. Otherwise, posix locks or win32 critical sections are + used. + +USE_RECURSIVE_LOCKS default: not defined + If defined nonzero, uses recursive (aka reentrant) locks, otherwise + uses plain mutexes. This is not required for malloc proper, but may + be needed for layered allocators such as nedmalloc. + +LOCK_AT_FORK default: not defined + If defined nonzero, performs pthread_atfork upon initialization + to initialize child lock while holding parent lock. The implementation + assumes that pthread locks (not custom locks) are being used. In other + cases, you may need to customize the implementation. + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +USE_DL_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +MALLOC_INSPECT_ALL default: NOT defined + If defined, compiles malloc_inspect_all and mspace_inspect_all, that + perform traversal of all heap space. Unless access to these + functions is otherwise restricted, you probably do not want to + include them in secure implementations. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. + +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 except on WINCE. + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +NO_MALLOC_STATS default: 0 + If defined, don't compile "malloc_stats". This avoids calls to + fprintf and bringing in stdio dependencies you might not want. + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H LACKS_SCHED_H LACKS_TIME_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. +*/ + +#include "dlmalloc.h" +#include + + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif +#endif // WIN32 + +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#include +#endif + +#include // for printing in malloc_stats +#include +#include // for abort() +#include // for memset etc + +#if !defined(WIN32) +#include // for MALLOC_FAILURE_ACTION +#include // for magic initialization +#include // for ffs (find first set bit in int) +#include // for mmap +#include +#include // for sbrk, sysconf +#endif + +#ifdef _MSC_VER +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* _MSC_VER */ + +namespace kram { + +#define DLTHROW throw () + +#define DLMALLOC_VERSION 20900 + + +#ifdef WIN32 +// don't have sbrk, use mmap +#define HAVE_MMAP 1 +//#define HAVE_MORECORE 0 + +#define MALLOC_FAILURE_ACTION +#define MMAP_CLEARS 1 + +#else // iOS/macOS + +// don't use sbrk on Apple, use mmap +#define HAVE_MMAP 1 +//#define HAVE_MORECORE 0 +#define MMAP_CLEARS 1 +#endif + +// 64-bit systems mostly require 16B aligned memory, also needed for SIMD +#define MALLOC_ALIGNMENT ((size_t)16U) + +// The maximum possible size_t value has all bits set +#define MAX_SIZE_T (~(size_t)0) + + +//---------------------------------------- + +#define ONLY_MSPACES 0 +#define MSPACES 1 + +//#ifndef ONLY_MSPACES +//#define ONLY_MSPACES 0 +//#endif +// +//#ifndef MSPACES +//#if ONLY_MSPACES +//#define MSPACES 1 +//#else +//#define MSPACES 0 +//#endif +//#endif + +//#ifndef MALLOC_ALIGNMENT +//#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *))) +//#endif + +#ifndef FOOTERS +#define FOOTERS 0 +#endif + +#ifndef ABORT +#define ABORT abort() +#endif + +//#ifndef ABORT_ON_ASSERT_FAILURE +//#define ABORT_ON_ASSERT_FAILURE 1 +//#endif + +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif + +// only on linux +#define HAVE_MREMAP 0 + +#ifndef INSECURE +#define INSECURE 0 +#endif + +#ifndef MALLOC_INSPECT_ALL +#define MALLOC_INSPECT_ALL 0 +#endif + +//#ifndef HAVE_MMAP +//#define HAVE_MMAP 1 +//#endif + +//#ifndef MMAP_CLEARS +//#define MMAP_CLEARS 1 +//#endif + +//#ifndef HAVE_MREMAP +//#ifdef linux // TODO: using linux check here +//#define HAVE_MREMAP 1 +//#ifndef _GNU_SOURCE +// #define _GNU_SOURCE /* Turns on mremap() definition */ +//#endif +//#else +//#define HAVE_MREMAP 0 +//#endif +//#endif + +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ + +//#ifndef HAVE_MORECORE +//#if ONLY_MSPACES +//#define HAVE_MORECORE 0 +//#else /* ONLY_MSPACES */ +//#define HAVE_MORECORE 1 +//#endif /* ONLY_MSPACES */ +//#endif /* HAVE_MORECORE */ +//#if !HAVE_MORECORE +//#define MORECORE_CONTIGUOUS 0 +//#else /* !HAVE_MORECORE */ +//#define MORECORE_DEFAULT sbrk +//#ifndef MORECORE_CONTIGUOUS +//#define MORECORE_CONTIGUOUS 1 +//#endif /* MORECORE_CONTIGUOUS */ +//#endif /* HAVE_MORECORE */ +// +//#ifndef DEFAULT_GRANULARITY +//#if (MORECORE_CONTIGUOUS || defined(WIN32)) +//#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +//#else /* MORECORE_CONTIGUOUS */ +//#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +//#endif /* MORECORE_CONTIGUOUS */ +//#endif /* DEFAULT_GRANULARITY */ +// +//#ifndef DEFAULT_TRIM_THRESHOLD +//#ifndef MORECORE_CANNOT_TRIM +//#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +//#else /* MORECORE_CANNOT_TRIM */ +//#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +//#endif /* MORECORE_CANNOT_TRIM */ +//#endif /* DEFAULT_TRIM_THRESHOLD */ + +// means use system page size +#define DEFAULT_GRANULARITY (0) + +// this trim is only for sbrk +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) + +// mmap +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#define MAX_RELEASE_CHECK_RATE 4095 + +//#ifndef DEFAULT_MMAP_THRESHOLD +//#if HAVE_MMAP +//#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +//#else /* HAVE_MMAP */ +//#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +//#endif /* HAVE_MMAP */ +//#endif /* DEFAULT_MMAP_THRESHOLD */ +// +//#ifndef MAX_RELEASE_CHECK_RATE +//#if HAVE_MMAP +//#define MAX_RELEASE_CHECK_RATE 4095 +//#else +//#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +//#endif /* HAVE_MMAP */ +//#endif /* MAX_RELEASE_CHECK_RATE */ + +//#ifndef USE_BUILTIN_FFS +//#define USE_BUILTIN_FFS 0 +//#endif /* USE_BUILTIN_FFS */ + +//#ifndef USE_DEV_RANDOM +//#define USE_DEV_RANDOM 0 +//#endif /* USE_DEV_RANDOM */ + +//#ifndef NO_MALLINFO +//#define NO_MALLINFO 0 +//#endif /* NO_MALLINFO */ +// +//#ifndef MALLINFO_FIELD_TYPE +//#define MALLINFO_FIELD_TYPE size_t +//#endif /* MALLINFO_FIELD_TYPE */ + +#define NO_MALLOC_STATS 0 +#define NO_SEGMENT_TRAVERSAL 0 + +//#ifndef NO_MALLOC_STATS +//#define NO_MALLOC_STATS 0 +//#endif /* NO_MALLOC_STATS */ + +//#ifndef NO_SEGMENT_TRAVERSAL +//#define NO_SEGMENT_TRAVERSAL 0 +//#endif /* NO_SEGMENT_TRAVERSAL */ + +// mallopt tuning options. SVID/XPG defines four standard parameter +// numbers for mallopt, normally defined in malloc.h. None of these +// are used in this malloc, so setting them has no effect. But this +// malloc does support the following options. +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + + +// Try to persuade compilers to inline. The most critical functions for +// inlining are defined as macros, so these aren't used for them. +#ifndef FORCEINLINE +#if defined(__GNUC__) || defined(__clang__) +#define FORCEINLINE __inline __attribute__ ((always_inline)) +#elif defined(_MSC_VER) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE inline +#endif +#endif + +// Probably more portable call now. mac is including ffs() +// Declarations for bit scanning on win32 +// This already gets pulled in by windows.h +//#ifdef _MSC_VER) +//#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +//extern "C" { +// unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +// unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +//} +// +//#define BitScanForward _BitScanForward +//#define BitScanReverse _BitScanReverse +//#pragma intrinsic(_BitScanForward) +//#pragma intrinsic(_BitScanReverse) +//#endif +//#endif + +// pagesize is 4K on intel, 16k on Apple Silicon, ? on android +#define malloc_getpagesize getpagesize() + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +// The bit mask value corresponding to MALLOC_ALIGNMENT +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - 1) + +// True if address a has acceptable alignment +bool is_aligned(void *p) { return ((size_t)p & CHUNK_ALIGN_MASK) == 0; } + +// the number of bytes to offset an address to align it +size_t align_offset(void *p) +{ + return (((size_t)p & CHUNK_ALIGN_MASK) == 0) ? 0 : + ((MALLOC_ALIGNMENT - ((size_t)p & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK); +} + +/* -------------------------- MMAP preliminaries ------------------------- */ + + +// If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and +// checks to fail so compiler optimizer can delete code rather than +// using so many "#if"s. + +// MORECORE and MMAP must return MFAIL on failure +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) // defined for convenience/ + +#ifdef WIN32 + +// This is an mmap implementation based on VirtualAlloc on Win. +// The one in win_mmap.h is for file mmap. + +// Win32 MMAP via VirtualAlloc +static FORCEINLINE void* win32mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +// For direct MMAP, use MEM_TOP_DOWN to minimize interference +static FORCEINLINE void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE | MEM_COMMIT | MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +// This function supports releasing coalesed segments +static FORCEINLINE int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = (char*)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define MMAP_DEFAULT(s) win32mmap(s) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) + +#else + +#define MMAP_PROT (PROT_READ | PROT_WRITE) +#define MAP_ANONYMOUS MAP_ANON +#define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) + +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) + +#endif + +// this is linux only, macOS/iOS doesn't have this, doubt Android provides acces +#if HAVE_MREMAP +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) +#else +#define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif + +// Define CALL_MORECORE +//#define CALL_MORECORE(S) MFAIL + +// Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP +#define USE_MMAP_BIT 1 +#define CALL_MMAP(s) MMAP_DEFAULT(s) +#define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) +#define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + + +// mstate bit set if continguous morecore disabled or failed +#define USE_NONCONTIGUOUS_BIT (4U) + +// segment bit set in create_mspace_with_base +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +/* + When locks are defined, there is one global lock, plus + one per-mspace lock. + + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. + By default, locks are simple non-reentrant mutexes. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks. Spinlocks are likely to + improve performance for lightly contended applications, but worsen + performance under heavy contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, DESTROY_LOCK, ACQUIRE_LOCK, RELEASE_LOCK + and TRY_LOCK. You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + + */ + +// Common code for all lock types +#define USE_LOCK_BIT (2U) + +// TODO: switch to futex, os_unfair_lock, recursive_mutex not needed? +using MLOCK_T = std::mutex; +MLOCK_T malloc_global_mutex; +#define ACQUIRE_MALLOC_GLOBAL_LOCK() malloc_global_mutex.lock(); +#define RELEASE_MALLOC_GLOBAL_LOCK() malloc_global_mutex.unlock(); +#define ACQUIRE_LOCK(lk) (lk)->lock() +#define RELEASE_LOCK(lk) (lk)->unlock() +#define TRY_LOCK(lk) (lk)->try() + + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 0) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + + */ + +#define PINUSE_BIT 1 +#define CINUSE_BIT 2 +#define FLAG4_BIT 4 +#define INUSE_BITS (PINUSE_BIT | CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT) + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (2 * sizeof(size_t)) +#else // FOOTERS +#define CHUNK_OVERHEAD (sizeof(size_t)) +#endif // FOOTERS + +// MMapped chunks need a second word of overhead ... +#define MMAP_CHUNK_OVERHEAD (2 * sizeof(size_t)) +// ... and additional padding for fake next-chunk at foot +#define MMAP_FOOT_PAD (4 * sizeof(size_t)) + +// The smallest size we can malloc is an aligned minimal chunk +#define MIN_CHUNK_SIZE ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +// =============================================================================== +struct malloc_chunk_header { + void set_size_and_pinuse_of_free_chunk(size_t s) { + _head = s | PINUSE_BIT; + set_foot(s); + } + + void set_foot(size_t s) { + ((malloc_chunk_header *)((char*)this + s))->_prev_foot = s; + } + + // extraction of fields from head words + bool cinuse() const { return !!(_head & CINUSE_BIT); } + bool pinuse() const { return !!(_head & PINUSE_BIT); } + bool flag4inuse() const { return !!(_head & FLAG4_BIT); } + bool is_inuse() const { return (_head & INUSE_BITS) != PINUSE_BIT; } + bool is_mmapped() const { return (_head & INUSE_BITS) == 0; } + + size_t chunksize() const { return _head & ~(FLAG_BITS); } + + void clear_pinuse() { _head &= ~PINUSE_BIT; } + void set_flag4() { _head |= FLAG4_BIT; } + void clear_flag4() { _head &= ~FLAG4_BIT; } + + // Treat space at ptr +/- offset as a chunk + malloc_chunk_header * chunk_plus_offset(size_t s) { + return (malloc_chunk_header *)((char*)this + s); + } + malloc_chunk_header * chunk_minus_offset(size_t s) { + return (malloc_chunk_header *)((char*)this - s); + } + + // Ptr to next or previous physical malloc_chunk. + malloc_chunk_header * next_chunk() { + return (malloc_chunk_header *)((char*)this + (_head & ~FLAG_BITS)); + } + malloc_chunk_header * prev_chunk() { + return (malloc_chunk_header *)((char*)this - (_prev_foot)); + } + + // extract next chunk's pinuse bit + size_t next_pinuse() { return next_chunk()->_head & PINUSE_BIT; } + + size_t _prev_foot; // Size of previous chunk (if free). + size_t _head; // Size and inuse bits. +}; + +// =============================================================================== +struct malloc_chunk : public malloc_chunk_header { + // Set size, pinuse bit, foot, and clear next pinuse + void set_free_with_pinuse(size_t s, malloc_chunk* n) + { + n->clear_pinuse(); + set_size_and_pinuse_of_free_chunk(s); + } + + // Get the internal overhead associated with chunk p + size_t overhead_for() { return is_mmapped() ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD; } + + // Return true if malloced space is not necessarily cleared + bool calloc_must_clear() + { +#if MMAP_CLEARS + return !is_mmapped(); +#else + return true; +#endif + } + + struct malloc_chunk* _fd; // double links -- used only if free. + struct malloc_chunk* _bk; +}; + +typedef malloc_chunk mchunk; +typedef malloc_chunk* mchunkptr; +typedef malloc_chunk_header *hchunkptr; +typedef malloc_chunk* sbinptr; // The type of bins of chunks +typedef unsigned int bindex_t; // Described below +typedef unsigned int binmap_t; // Described below +typedef unsigned int flag_t; // The type of various bit flag sets + +// conversion from malloc headers to user pointers, and back +static FORCEINLINE void *chunk2mem(const void *p) { return (void *)((char *)p + 2 * sizeof(size_t)); } +static FORCEINLINE mchunkptr mem2chunk(const void *mem) { return (mchunkptr)((char *)mem - 2 * sizeof(size_t)); } + +// chunk associated with aligned address A +static FORCEINLINE mchunkptr align_as_chunk(char *A) { return (mchunkptr)(A + align_offset(chunk2mem(A))); } + +// Bounds on request (not chunk) sizes. +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - 1) + +// pad request bytes into a usable size +static FORCEINLINE size_t pad_request(size_t req) +{ + return (req + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK; +} + +// pad request, checking for minimum (but not maximum) +static FORCEINLINE size_t request2size(size_t req) +{ + return req < MIN_REQUEST ? MIN_CHUNK_SIZE : pad_request(req); +} + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. + */ + +// Head value for fenceposts +#define FENCEPOST_HEAD (INUSE_BITS | sizeof(size_t)) + + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | + nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | + nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. + */ + +// =============================================================================== +struct malloc_tree_chunk : public malloc_chunk_header { + malloc_tree_chunk *leftmost_child() { + return _child[0] ? _child[0] : _child[1]; + } + + + malloc_tree_chunk* _fd; + malloc_tree_chunk* _bk; + + malloc_tree_chunk* _child[2]; + malloc_tree_chunk* _parent; + bindex_t _index; +}; + +typedef malloc_tree_chunk tchunk; +typedef malloc_tree_chunk* tchunkptr; +typedef malloc_tree_chunk* tbinptr; // The type of bins of trees + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If USE_MMAP_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. + */ + +// =============================================================================== +struct malloc_segment { + bool is_mmapped_segment() { return !!(_sflags & USE_MMAP_BIT); } + bool is_extern_segment() { return !!(_sflags & EXTERN_BIT); } + + char* _base; // base address + size_t _size; // allocated size + malloc_segment* _next; // ptr to next segment + flag_t _sflags; // mmap and extern flag +}; + +typedef malloc_segment msegment; +typedef malloc_segment* msegmentptr; + +/* ------------- Malloc_params ------------------- */ + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. + */ + +// =============================================================================== +struct malloc_params { + malloc_params() : _magic(0) {} + + void ensure_initialization() + { + if (!_magic) + _init(); + } + int change(int param_number, int value); + + size_t page_align(size_t sz) { + return (sz + (_page_size - 1)) & ~(_page_size - 1); + } + + size_t granularity_align(size_t sz) { + return (sz + (_granularity - 1)) & ~(_granularity - 1); + } + + bool is_page_aligned(char *S) { + return ((size_t)S & (_page_size - 1)) == 0; + } + + int _init(); + + size_t _magic; + size_t _page_size; + size_t _granularity; + size_t _mmap_threshold; + size_t _trim_threshold; + flag_t _default_mflags; +}; + +static malloc_params mparams; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams._magic. + + Max allowed footprint + The maximum allowed bytes to allocate from system (zero means no limit) + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Trim support + Fields holding the amount of unused topmost memory that should trigger + trimming, and a counter to force periodic scanning to release unused + non-topmost segments. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. + */ + + +// ================================================================================ +class malloc_state +{ +public: + // ----------------------- _malloc, _free, etc... --- + FORCEINLINE void* _malloc(size_t bytes); + FORCEINLINE void _free(mchunkptr p); + + + // ------------------------ Relays to internal calls to malloc/free from realloc, memalign etc +#if ONLY_MSPACES + void *internal_malloc(size_t b) { return mspace_malloc(this, b); } + void internal_free(void *mem) { mspace_free(this,mem); } +#else +#if MSPACES + FORCEINLINE void *internal_malloc(size_t b); + FORCEINLINE void internal_free(void *mem); +#else + void *internal_malloc(size_t b) { return dlmalloc(b); } + void internal_free(void *mem) { dlfree(mem); } +#endif +#endif + + //------------------------ ----------------------- + + dlmallinfo internal_mallinfo(); + void internal_malloc_stats(); + + void init_top(mchunkptr p, size_t psize); + void init_bins(); + void init(char* tbase, size_t tsize); + + // ------------------------ System alloc/dealloc -------------------------- + void* sys_alloc(size_t nb); + size_t release_unused_segments(); + int sys_trim(size_t pad); + void dispose_chunk(mchunkptr p, size_t psize); + + // ----------------------- Internal support for realloc, memalign, etc --- + mchunkptr try_realloc_chunk(mchunkptr p, size_t nb, int can_move); + void* internal_memalign(size_t alignment, size_t bytes); + void** ialloc(size_t n_elements, size_t* sizes, int opts, void* chunks[]); + size_t internal_bulk_free(void* array[], size_t nelem); + void internal_inspect_all(void(*handler)(void *start, void *end, + size_t used_bytes, void* callback_arg), + void* arg); + + // -------------------------- system alloc setup (Operations on mflags) ----- + bool use_lock() const { return !!(_mflags & USE_LOCK_BIT); } + void set_lock(int l) { + _mflags = l ? _mflags | USE_LOCK_BIT : _mflags & ~USE_LOCK_BIT; + } + MLOCK_T& get_mutex() { return _mutex; } + + bool use_mmap() const { return !!(_mflags & USE_MMAP_BIT); } + void enable_mmap() { _mflags |= USE_MMAP_BIT; } + void disable_mmap() { _mflags &= ~USE_MMAP_BIT; } + + // ----------------------- Runtime Check Support ------------------------- + + /* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probabalistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. + */ + + +#if !INSECURE + // Check if address a is at least as high as any from MORECORE or MMAP + bool ok_address(void *a) const { return (char *)a >= _least_addr; } + + // Check if address of next chunk n is higher than base chunk p + static bool ok_next(void *p, void *n) { return p < n; } + + // Check if p has inuse status + static bool ok_inuse(mchunkptr p) { return p->is_inuse(); } + + // Check if p has its pinuse bit on + static bool ok_pinuse(mchunkptr p) { return p->pinuse(); } + + // Check if (alleged) mstate m has expected magic field + bool ok_magic() const { return _magic == mparams._magic; } + + // In gcc, use __builtin_expect to minimize impact of checks +#if defined(__GNUC__) && __GNUC__ >= 3 + static bool rtcheck(bool e) { return __builtin_expect(e, 1); } +#else + static bool rtcheck(bool e) { return e; } +#endif +#else + static bool ok_address(void *a) { return true; } + static bool ok_next(void *p, void *n) { return true; } + static bool ok_inuse(mchunkptr p) { return true; } + static bool ok_pinuse(mchunkptr p) { return true; } + static bool ok_magic() { return true; } + static bool rtcheck(bool e) { return true; } +#endif + + bool is_initialized() const { return _top != 0; } + + bool use_noncontiguous() const { return !!(_mflags & USE_NONCONTIGUOUS_BIT); } + void disable_contiguous() { _mflags |= USE_NONCONTIGUOUS_BIT; } + + // Return segment holding given address + msegmentptr segment_holding(char* addr) const { + msegmentptr sp = (msegmentptr)&_seg; + for (;;) { + if (addr >= sp->_base && addr < sp->_base + sp->_size) + return sp; + if ((sp = sp->_next) == 0) + return 0; + } + } + + // Return true if segment contains a segment link + int has_segment_link(msegmentptr ss) const { + msegmentptr sp = (msegmentptr)&_seg; + for (;;) { + if ((char*)sp >= ss->_base && (char*)sp < ss->_base + ss->_size) + return 1; + if ((sp = sp->_next) == 0) + return 0; + } + } + +#ifndef MORECORE_CANNOT_TRIM + bool should_trim(size_t s) const { return s > _trim_check; } +#else + bool should_trim(size_t s) const { return false; } +#endif + + + // -------------------------- Debugging setup ---------------------------- + +#if ! DEBUG + void check_free_chunk(mchunkptr) {} + void check_inuse_chunk(mchunkptr) {} + void check_malloced_chunk(void* , size_t) {} + void check_mmapped_chunk(mchunkptr) {} + void check_malloc_state() {} + void check_top_chunk(mchunkptr) {} +#else // DEBUG + void check_free_chunk(mchunkptr p) { do_check_free_chunk(p); } + void check_inuse_chunk(mchunkptr p) { do_check_inuse_chunk(p); } + void check_malloced_chunk(void* p, size_t s) { do_check_malloced_chunk(p, s); } + void check_mmapped_chunk(mchunkptr p) { do_check_mmapped_chunk(p); } + void check_malloc_state() { do_check_malloc_state(); } + void check_top_chunk(mchunkptr p) { do_check_top_chunk(p); } + + void do_check_any_chunk(mchunkptr p) const; + void do_check_top_chunk(mchunkptr p) const; + void do_check_mmapped_chunk(mchunkptr p) const; + void do_check_inuse_chunk(mchunkptr p) const; + void do_check_free_chunk(mchunkptr p) const; + void do_check_malloced_chunk(void* mem, size_t s) const; + void do_check_tree(tchunkptr t); + void do_check_treebin(bindex_t i); + void do_check_smallbin(bindex_t i); + void do_check_malloc_state(); + int bin_find(mchunkptr x); + size_t traverse_and_check(); +#endif // DEBUG + +private: + + // ---------------------------- Indexing Bins ---------------------------- + + static bool is_small(size_t s) { return (s >> SMALLBIN_SHIFT) < NSMALLBINS; } + static bindex_t small_index(size_t s) { return (bindex_t)(s >> SMALLBIN_SHIFT); } + static size_t small_index2size(size_t i) { return i << SMALLBIN_SHIFT; } + static bindex_t MIN_SMALL_INDEX() { return small_index(MIN_CHUNK_SIZE); } + + // assign tree index for size S to variable I. Use x86 asm if possible +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + FORCEINLINE static bindex_t compute_tree_index(size_t S) + { + unsigned int X = S >> TREEBIN_SHIFT; + if (X == 0) + return 0; + else if (X > 0xFFFF) + return NTREEBINS - 1; + + unsigned int K = (unsigned) sizeof(X)*__CHAR_BIT__ - 1 - (unsigned) __builtin_clz(X); + return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); + } + +#elif defined (__INTEL_COMPILER) + FORCEINLINE static bindex_t compute_tree_index(size_t S) + { + size_t X = S >> TREEBIN_SHIFT; + if (X == 0) + return 0; + else if (X > 0xFFFF) + return NTREEBINS - 1; + + unsigned int K = _bit_scan_reverse (X); + return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); + } + +#elif defined(_MSC_VER) && _MSC_VER>=1300 + FORCEINLINE static bindex_t compute_tree_index(size_t S) + { + size_t X = S >> TREEBIN_SHIFT; + if (X == 0) + return 0; + else if (X > 0xFFFF) + return NTREEBINS - 1; + + unsigned int K; + _BitScanReverse((DWORD *) &K, (DWORD) X); + return (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); + } + +#else // GNUC + FORCEINLINE static bindex_t compute_tree_index(size_t S) + { + size_t X = S >> TREEBIN_SHIFT; + if (X == 0) + return 0; + else if (X > 0xFFFF) + return NTREEBINS - 1; + + unsigned int Y = (unsigned int)X; + unsigned int N = ((Y - 0x100) >> 16) & 8; + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4; + N += K; + N += K = (((Y <<= K) - 0x4000) >> 16) & 2; + K = 14 - N + ((Y <<= K) >> 15); + return (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)); + } +#endif // GNUC + + // Shift placing maximum resolved bit in a treebin at i as sign bit + static bindex_t leftshift_for_tree_index(bindex_t i) + { + return (i == NTREEBINS - 1) ? 0 : + ((SIZE_T_BITSIZE - 1) - ((i >> 1) + TREEBIN_SHIFT - 2)); + } + + // The size of the smallest chunk held in bin with index i + static bindex_t minsize_for_tree_index(bindex_t i) + { + return ((size_t)1 << ((i >> 1) + TREEBIN_SHIFT)) | + (((size_t)(i & 1)) << ((i >> 1) + TREEBIN_SHIFT - 1)); + } + + + // ----------- isolate the least set bit of a bitmap + static binmap_t least_bit(binmap_t x) { return x & -x; } + + // ----------- mask with all bits to left of least bit of x on + static binmap_t left_bits(binmap_t x) { return (x<<1) | -(x<<1); } + + // index corresponding to given bit. Use x86 asm if possible +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + static bindex_t compute_bit2idx(binmap_t X) + { + unsigned int J; + J = __builtin_ctz(X); + return (bindex_t)J; + } + +#elif defined (__INTEL_COMPILER) + static bindex_t compute_bit2idx(binmap_t X) + { + unsigned int J; + J = _bit_scan_forward (X); + return (bindex_t)J; + } + +#elif defined(_MSC_VER) && _MSC_VER>=1300 + static bindex_t compute_bit2idx(binmap_t X) + { + unsigned int J; + _BitScanForward((DWORD *) &J, X); + return (bindex_t)J; + } + + //#elif USE_BUILTIN_FFS + // static bindex_t compute_bit2idx(binmap_t X) { return ffs(X) - 1; } + +#else + static bindex_t compute_bit2idx(binmap_t X) + { + unsigned int Y = X - 1; + unsigned int K = Y >> (16-4) & 16; + unsigned int N = K; Y >>= K; + N += K = Y >> (8-3) & 8; Y >>= K; + N += K = Y >> (4-2) & 4; Y >>= K; + N += K = Y >> (2-1) & 2; Y >>= K; + N += K = Y >> (1-0) & 1; Y >>= K; + return (bindex_t)(N + Y); + } +#endif // GNUC + + // ------------------------ Set up inuse chunks with or without footers --- +#if !FOOTERS + void mark_inuse_foot(malloc_chunk_header *, size_t) {} +#else + //Set foot of inuse chunk to be xor of mstate and seed + void mark_inuse_foot(malloc_chunk_header *p, size_t s) { + (((mchunkptr)((char*)p + s))->prev_foot = (size_t)this ^ mparams._magic); } +#endif + + void set_inuse(malloc_chunk_header *p, size_t s) { + p->_head = (p->_head & PINUSE_BIT) | s | CINUSE_BIT; + ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT; + mark_inuse_foot(p,s); + } + + void set_inuse_and_pinuse(malloc_chunk_header *p, size_t s) { + p->_head = s | PINUSE_BIT | CINUSE_BIT; + ((mchunkptr)(((char*)p) + s))->_head |= PINUSE_BIT; + mark_inuse_foot(p,s); + } + + void set_size_and_pinuse_of_inuse_chunk(malloc_chunk_header *p, size_t s) { + p->_head = s | PINUSE_BIT | CINUSE_BIT; + mark_inuse_foot(p, s); + } + + // ------------------------ Addressing by index. See about smallbin repositioning --- + sbinptr smallbin_at(bindex_t i) const { return (sbinptr)((char*)&_smallbins[i << 1]); } + tbinptr* treebin_at(bindex_t i) { return &_treebins[i]; } + + // ----------------------- bit corresponding to given index --------- + static binmap_t idx2bit(bindex_t i) { return ((binmap_t)1 << i); } + + // --------------- Mark/Clear bits with given index + void mark_smallmap(bindex_t i) { _smallmap |= idx2bit(i); } + void clear_smallmap(bindex_t i) { _smallmap &= ~idx2bit(i); } + binmap_t smallmap_is_marked(bindex_t i) const { return _smallmap & idx2bit(i); } + + void mark_treemap(bindex_t i) { _treemap |= idx2bit(i); } + void clear_treemap(bindex_t i) { _treemap &= ~idx2bit(i); } + binmap_t treemap_is_marked(bindex_t i) const { return _treemap & idx2bit(i); } + + // ------------------------ ----------------------- + FORCEINLINE void insert_small_chunk(mchunkptr P, size_t S); + FORCEINLINE void unlink_small_chunk(mchunkptr P, size_t S); + FORCEINLINE void unlink_first_small_chunk(mchunkptr B, mchunkptr P, bindex_t I); + FORCEINLINE void replace_dv(mchunkptr P, size_t S); + + // ------------------------- Operations on trees ------------------------- + FORCEINLINE void insert_large_chunk(tchunkptr X, size_t S); + FORCEINLINE void unlink_large_chunk(tchunkptr X); + + // ------------------------ Relays to large vs small bin operations + FORCEINLINE void insert_chunk(mchunkptr P, size_t S); + FORCEINLINE void unlink_chunk(mchunkptr P, size_t S); + + // ----------------------- Direct-mmapping chunks ----------------------- + void* mmap_alloc(size_t nb); + mchunkptr mmap_resize(mchunkptr oldp, size_t nb, int flags); + + void reset_on_error(); + void* prepend_alloc(char* newbase, char* oldbase, size_t nb); + void add_segment(char* tbase, size_t tsize, flag_t mmapped); + + // ------------------------ malloc --------------------------- + void* tmalloc_large(size_t nb); + void* tmalloc_small(size_t nb); + + // ------------------------Bin types, widths and sizes -------- + static const size_t NSMALLBINS = 32; + static const size_t NTREEBINS = 32; + static const size_t SMALLBIN_SHIFT = 3; + static const size_t SMALLBIN_WIDTH = 1 << SMALLBIN_SHIFT; + static const size_t TREEBIN_SHIFT = 8; + static const size_t MIN_LARGE_SIZE = 1 << TREEBIN_SHIFT; + static const size_t MAX_SMALL_SIZE = (MIN_LARGE_SIZE - 1); + static const size_t MAX_SMALL_REQUEST = (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD); + + // ------------------------ data members --------------------------- + binmap_t _smallmap; + binmap_t _treemap; + size_t _dvsize; + size_t _topsize; + char* _least_addr; + mchunkptr _dv; + mchunkptr _top; + size_t _trim_check; + size_t _release_checks; + size_t _magic; + mchunkptr _smallbins[(NSMALLBINS+1)*2]; + tbinptr _treebins[NTREEBINS]; +public: + size_t _footprint; + size_t _max_footprint; + size_t _footprint_limit; // zero means no limit + flag_t _mflags; + + msegment _seg; + + // This prevents memset, since it's an object + MLOCK_T _mutex; // locate lock among fields that rarely change + +private: + void* _extp; // Unused but available for extensions + size_t _exts; +}; + +// ------------- end malloc_state ------------------- + +#if FOOTERS +malloc_state* get_mstate_for(malloc_chunk_header *p) { + return (malloc_state*)(((mchunkptr)((char*)(p) + + (p->chunksize())))->prev_foot ^ mparams._magic); +} +#endif + +// ------------- Global malloc_state ------------------- + +#if !ONLY_MSPACES + +// The global malloc_state used for all non-"mspace" calls +static malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) + +#endif // !ONLY_MSPACES + +// -------------------------- system alloc setup ------------------------- + + + +// For mmap, use granularity alignment on windows, else page-align +#ifdef WIN32 +#define mmap_align(S) mparams.granularity_align(S) +#else +#define mmap_align(S) mparams.page_align(S) +#endif + +// For sys_alloc, enough padding to ensure can malloc request on success +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) + + +// True if segment S holds address A +bool segment_holds(msegmentptr S, mchunkptr A) { + return (char*)A >= S->_base && (char*)A < S->_base + S->_size; +} + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. + */ +#define TOP_FOOT_SIZE \ +(align_offset(chunk2mem((void *)0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +// ------------------------------- Hooks -------------------------------- + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. + */ + +#define PREACTION(M) ACQUIRE_LOCK(&(M)->get_mutex()) +#define POSTACTION(M) RELEASE_LOCK(&(M)->get_mutex()) + + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. + */ + +#if PROCEED_ON_ERROR + +// A count of the number of corruption errors causing resets +int malloc_corruption_error_count; + +#define CORRUPTION_ERROR_ACTION(m) m->reset_on_error() +#define USAGE_ERROR_ACTION(m, p) + +#else // PROCEED_ON_ERROR + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif // CORRUPTION_ERROR_ACTION + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif // USAGE_ERROR_ACTION + +#endif // PROCEED_ON_ERROR + +// ---------------------------- setting mparams -------------------------- + +//#if LOCK_AT_FORK +// static void pre_fork(void) { ACQUIRE_LOCK(&(gm)->get_mutex()); } +// static void post_fork_parent(void) { RELEASE_LOCK(&(gm)->get_mutex()); } +// static void post_fork_child(void) { INITIAL_LOCK(&(gm)->get_mutex()); } +//#endif // LOCK_AT_FORK + +// Initialize mparams +int malloc_params::_init() { + //#ifdef NEED_GLOBAL_LOCK_INIT + // if (malloc_global_mutex_status <= 0) + // init_malloc_global_mutex(); + //#endif + + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (_magic == 0) { + size_t magic; + size_t psize; + size_t gsize; + +#ifndef WIN32 + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); +#else // WIN32 + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); + } +#endif // WIN32 + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-1)) != 0) || + ((gsize & (gsize-1)) != 0) || + ((psize & (psize-1)) != 0)) + ABORT; + _granularity = gsize; + _page_size = psize; + _mmap_threshold = DEFAULT_MMAP_THRESHOLD; + _trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + _default_mflags = USE_LOCK_BIT | USE_MMAP_BIT; +#else // MORECORE_CONTIGUOUS + _default_mflags = USE_LOCK_BIT | USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT; +#endif // MORECORE_CONTIGUOUS + +#if !ONLY_MSPACES + // Set up lock for main malloc area + gm->_mflags = _default_mflags; + //(void)INITIAL_LOCK(&gm->get_mutex()); +#endif + //#if LOCK_AT_FORK + // pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child); + //#endif + + { + //#if USE_DEV_RANDOM + // int fd; + // unsigned char buf[sizeof(size_t)]; + // // Try to use /dev/urandom, else fall back on using time + // if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + // read(fd, buf, sizeof(buf)) == sizeof(buf)) { + // magic = *((size_t *) buf); + // close(fd); + // } + // else + //#endif // USE_DEV_RANDOM + { +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); + //#elif defined(LACKS_TIME_H) + // magic = (size_t)&magic ^ (size_t)0x55555555U; +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + } + magic |= (size_t)8U; // ensure nonzero + magic &= ~(size_t)7U; // improve chances of fault for bad values + // Until memory modes commonly available, use volatile-write + (*(volatile size_t *)(&(_magic))) = magic; + } + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +} + +// support for mallopt +int malloc_params::change(int param_number, int value) { + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; + + switch(param_number) { + case M_TRIM_THRESHOLD: + _trim_threshold = val; + return 1; + + case M_GRANULARITY: + if (val >= _page_size && ((val & (val - 1)) == 0)) { + _granularity = val; + return 1; + } + else + return 0; + + case M_MMAP_THRESHOLD: + _mmap_threshold = val; + return 1; + + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +// Check properties of any chunk, whether free, inuse, mmapped etc +void malloc_state::do_check_any_chunk(mchunkptr p) const { + assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD)); + assert(ok_address(p)); +} + +// Check properties of top chunk +void malloc_state::do_check_top_chunk(mchunkptr p) const { + msegmentptr sp = segment_holding((char*)p); + size_t sz = p->_head & ~INUSE_BITS; // third-lowest bit can be set! + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD)); + assert(ok_address(p)); + assert(sz == _topsize); + assert(sz > 0); + assert(sz == ((sp->_base + sp->_size) - (char*)p) - TOP_FOOT_SIZE); + assert(p->pinuse()); + assert(!p->chunk_plus_offset(sz)->pinuse()); +} + +// Check properties of (inuse) mmapped chunks +void malloc_state::do_check_mmapped_chunk(mchunkptr p) const { + size_t sz = p->chunksize(); + size_t len = (sz + (p->_prev_foot) + MMAP_FOOT_PAD); + assert(p->is_mmapped()); + assert(use_mmap()); + assert((is_aligned(chunk2mem(p))) || (p->_head == FENCEPOST_HEAD)); + assert(ok_address(p)); + assert(!is_small(sz)); + assert((len & (mparams._page_size - 1)) == 0); + assert(p->chunk_plus_offset(sz)->_head == FENCEPOST_HEAD); + assert(p->chunk_plus_offset(sz+sizeof(size_t))->_head == 0); +} + +// Check properties of inuse chunks +void malloc_state::do_check_inuse_chunk(mchunkptr p) const { + do_check_any_chunk(p); + assert(p->is_inuse()); + assert(p->next_pinuse()); + // If not pinuse and not mmapped, previous chunk has OK offset + assert(p->is_mmapped() || p->pinuse() || (mchunkptr)p->prev_chunk()->next_chunk() == p); + if (p->is_mmapped()) + do_check_mmapped_chunk(p); +} + +// Check properties of free chunks +void malloc_state::do_check_free_chunk(mchunkptr p) const { + size_t sz = p->chunksize(); + mchunkptr next = (mchunkptr)p->chunk_plus_offset(sz); + do_check_any_chunk(p); + assert(!p->is_inuse()); + assert(!p->next_pinuse()); + assert (!p->is_mmapped()); + if (p != _dv && p != _top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->_prev_foot == sz); + assert(p->pinuse()); + assert (next == _top || next->is_inuse()); + assert(p->_fd->_bk == p); + assert(p->_bk->_fd == p); + } + else // markers are always of size sizeof(size_t) + assert(sz == sizeof(size_t)); + } +} + +// Check properties of malloced chunks at the point they are malloced +void malloc_state::do_check_malloced_chunk(void* mem, size_t s) const { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->_head & ~INUSE_BITS; + do_check_inuse_chunk(p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + // unless mmapped, size is less than MIN_CHUNK_SIZE more than request + assert(p->is_mmapped() || sz < (s + MIN_CHUNK_SIZE)); + } +} + +// Check a tree and its subtrees. +void malloc_state::do_check_tree(tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->_index; + size_t tsize = t->chunksize(); + bindex_t idx = compute_tree_index(tsize); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS - 1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { + // traverse through chain of same-sized nodes + do_check_any_chunk((mchunkptr)u); + assert(u->_index == tindex); + assert(u->chunksize() == tsize); + assert(!u->is_inuse()); + assert(!u->next_pinuse()); + assert(u->_fd->_bk == u); + assert(u->_bk->_fd == u); + if (u->_parent == 0) { + assert(u->_child[0] == 0); + assert(u->_child[1] == 0); + } + else { + assert(head == 0); // only one node on chain has parent + head = u; + assert(u->_parent != u); + assert (u->_parent->_child[0] == u || + u->_parent->_child[1] == u || + *((tbinptr*)(u->_parent)) == u); + if (u->_child[0] != 0) { + assert(u->_child[0]->_parent == u); + assert(u->_child[0] != u); + do_check_tree(u->_child[0]); + } + if (u->_child[1] != 0) { + assert(u->_child[1]->_parent == u); + assert(u->_child[1] != u); + do_check_tree(u->_child[1]); + } + if (u->_child[0] != 0 && u->_child[1] != 0) { + assert(u->_child[0]->chunksize() < u->_child[1]->chunksize()); + } + } + u = u->_fd; + } while (u != t); + assert(head != 0); +} + +// Check all the chunks in a treebin. +void malloc_state::do_check_treebin(bindex_t i) { + tbinptr* tb = (tbinptr*)treebin_at(i); + tchunkptr t = *tb; + int empty = (_treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(t); +} + +// Check all the chunks in a smallbin. +void malloc_state::do_check_smallbin(bindex_t i) { + sbinptr b = smallbin_at(i); + mchunkptr p = b->_bk; + unsigned int empty = (_smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->_bk) { + size_t size = p->chunksize(); + mchunkptr q; + // each chunk claims to be free + do_check_free_chunk(p); + // chunk belongs in bin + assert(small_index(size) == i); + assert(p->_bk == b || p->_bk->chunksize() == p->chunksize()); + // chunk is followed by an inuse chunk + q = (mchunkptr)p->next_chunk(); + if (q->_head != FENCEPOST_HEAD) + do_check_inuse_chunk(q); + } + } +} + +// Find x in a bin. Used in other check functions. +int malloc_state::bin_find(mchunkptr x) { + size_t size = x->chunksize(); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(sidx); + if (smallmap_is_marked(sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->_fd) != b); + } + } + else { + bindex_t tidx = compute_tree_index(size); + if (treemap_is_marked(tidx)) { + tchunkptr t = *treebin_at(tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && t->chunksize() != size) { + t = t->_child[(sizebits >> (SIZE_T_BITSIZE - 1)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->_fd) != t); + } + } + } + return 0; +} + +// Traverse each chunk and check it; return total +size_t malloc_state::traverse_and_check() { + size_t sum = 0; + if (is_initialized()) { + msegmentptr s = (msegmentptr)&_seg; + sum += _topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->_base); + mchunkptr lastq = 0; + assert(q->pinuse()); + while (segment_holds(s, q) && + q != _top && q->_head != FENCEPOST_HEAD) { + sum += q->chunksize(); + if (q->is_inuse()) { + assert(!bin_find(q)); + do_check_inuse_chunk(q); + } + else { + assert(q == _dv || bin_find(q)); + assert(lastq == 0 || lastq->is_inuse()); // Not 2 consecutive free + do_check_free_chunk(q); + } + lastq = q; + q = (mchunkptr)q->next_chunk(); + } + s = s->_next; + } + } + return sum; +} + + +// Check all properties of malloc_state. +void malloc_state::do_check_malloc_state() { + bindex_t i; + size_t total; + // check bins + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(i); + + if (_dvsize != 0) { + // check dv chunk + do_check_any_chunk(_dv); + assert(_dvsize == _dv->chunksize()); + assert(_dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(_dv) == 0); + } + + if (_top != 0) { + // check top chunk + do_check_top_chunk(_top); + //assert(topsize == top->chunksize()); redundant + assert(_topsize > 0); + assert(bin_find(_top) == 0); + } + + total = traverse_and_check(); + assert(total <= _footprint); + assert(_footprint <= _max_footprint); +} +#endif // DEBUG + +/* ----------------------------- statistics ------------------------------ */ + +// =============================================================================== +dlmallinfo malloc_state::internal_mallinfo() { + dlmallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mparams.ensure_initialization(); + { + PREACTION(this); + check_malloc_state(); + if (is_initialized()) { + size_t nfree = 1; // top always free + size_t mfree = _topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &_seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->_base); + while (segment_holds(s, q) && + q != _top && q->_head != FENCEPOST_HEAD) { + size_t sz = q->chunksize(); + sum += sz; + if (!q->is_inuse()) { + mfree += sz; + ++nfree; + } + q = (mchunkptr)q->next_chunk(); + } + s = s->_next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = _footprint - sum; + nm.usmblks = _max_footprint; + nm.uordblks = _footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = _topsize; + } + + POSTACTION(this); + } + return nm; +} + +#if !NO_MALLOC_STATS +void malloc_state::internal_malloc_stats() { + mparams.ensure_initialization(); + { + PREACTION(this); + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(); + if (is_initialized()) { + msegmentptr s = &_seg; + maxfp = _max_footprint; + fp = _footprint; + used = fp - (_topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->_base); + while (segment_holds(s, q) && + q != _top && q->_head != FENCEPOST_HEAD) { + if (!q->is_inuse()) + used -= q->chunksize(); + q = (mchunkptr)q->next_chunk(); + } + s = s->_next; + } + } + POSTACTION(this); // drop lock + + // fprintf doesn't workon Win/Android gui apps + // also weird to log info to stderr just to avoid buffering + fprintf(stdout, "max system bytes = %10lu\n" + "system bytes = %10lu\n" + "in use bytes = %10lu\n", + (unsigned long)(maxfp), + (unsigned long)(fp), + (unsigned long)(used)); + } +} +#endif // NO_MALLOC_STATS + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. + */ + +// Link a free chunk into a smallbin +void malloc_state::insert_small_chunk(mchunkptr p, size_t s) { + bindex_t I = small_index(s); + mchunkptr B = smallbin_at(I); + mchunkptr F = B; + assert(s >= MIN_CHUNK_SIZE); + if (!smallmap_is_marked(I)) + mark_smallmap(I); + else if (rtcheck(ok_address(B->_fd))) + F = B->_fd; + else { + CORRUPTION_ERROR_ACTION(this); + } + B->_fd = p; + F->_bk = p; + p->_fd = F; + p->_bk = B; +} + +// Unlink a chunk from a smallbin +void malloc_state::unlink_small_chunk(mchunkptr p, size_t s) { + mchunkptr F = p->_fd; + mchunkptr B = p->_bk; + bindex_t I = small_index(s); + assert(p != B); + assert(p != F); + assert(p->chunksize() == small_index2size(I)); + if (rtcheck(F == smallbin_at(I) || (ok_address(F) && F->_bk == p))) { + if (B == F) { + clear_smallmap(I); + } + else if (rtcheck(B == smallbin_at(I) || + (ok_address(B) && B->_fd == p))) { + F->_bk = B; + B->_fd = F; + } + else { + CORRUPTION_ERROR_ACTION(this); + } + } + else { + CORRUPTION_ERROR_ACTION(this); + } +} + +// Unlink the first chunk from a smallbin +void malloc_state::unlink_first_small_chunk(mchunkptr B, mchunkptr p, bindex_t I) { + mchunkptr F = p->_fd; + assert(p != B); + assert(p != F); + assert(p->chunksize() == small_index2size(I)); + if (B == F) { + clear_smallmap(I); + } + else if (rtcheck(ok_address(F) && F->_bk == p)) { + F->_bk = B; + B->_fd = F; + } + else { + CORRUPTION_ERROR_ACTION(this); + } +} + +// Replace dv node, binning the old one +// Used only when dvsize known to be small +void malloc_state::replace_dv(mchunkptr p, size_t s) { + size_t DVS = _dvsize; + assert(is_small(DVS)); + if (DVS != 0) { + mchunkptr DV = _dv; + insert_small_chunk(DV, DVS); + } + _dvsize = s; + _dv = p; +} + +/* ------------------------- Operations on trees ------------------------- */ + +// Insert chunk into tree +void malloc_state::insert_large_chunk(tchunkptr X, size_t s) { + tbinptr* H; + bindex_t I = compute_tree_index(s); + H = treebin_at(I); + X->_index = I; + X->_child[0] = X->_child[1] = 0; + if (!treemap_is_marked(I)) { + mark_treemap(I); + *H = X; + X->_parent = (tchunkptr)H; + X->_fd = X->_bk = X; + } + else { + tchunkptr T = *H; + size_t K = s << leftshift_for_tree_index(I); + for (;;) { + if (T->chunksize() != s) { + tchunkptr* C = &(T->_child[(K >> (SIZE_T_BITSIZE - 1)) & 1]); + K <<= 1; + if (*C != 0) + T = *C; + else if (rtcheck(ok_address(C))) { + *C = X; + X->_parent = T; + X->_fd = X->_bk = X; + break; + } + else { + CORRUPTION_ERROR_ACTION(this); + break; + } + } + else { + tchunkptr F = T->_fd; + if (rtcheck(ok_address(T) && ok_address(F))) { + T->_fd = F->_bk = X; + X->_fd = F; + X->_bk = T; + X->_parent = 0; + break; + } + else { + CORRUPTION_ERROR_ACTION(this); + break; + } + } + } + } +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). + */ + +void malloc_state::unlink_large_chunk(tchunkptr X) { + tchunkptr XP = X->_parent; + tchunkptr R; + if (X->_bk != X) { + tchunkptr F = X->_fd; + R = X->_bk; + if (rtcheck(ok_address(F) && F->_bk == X && R->_fd == X)) { + F->_bk = R; + R->_fd = F; + } + else { + CORRUPTION_ERROR_ACTION(this); + } + } + else { + tchunkptr* RP; + if (((R = *(RP = &(X->_child[1]))) != 0) || + ((R = *(RP = &(X->_child[0]))) != 0)) { + tchunkptr* CP; + while ((*(CP = &(R->_child[1])) != 0) || + (*(CP = &(R->_child[0])) != 0)) { + R = *(RP = CP); + } + if (rtcheck(ok_address(RP))) + *RP = 0; + else { + CORRUPTION_ERROR_ACTION(this); + } + } + } + if (XP != 0) { + tbinptr* H = treebin_at(X->_index); + if (X == *H) { + if ((*H = R) == 0) + clear_treemap(X->_index); + } + else if (rtcheck(ok_address(XP))) { + if (XP->_child[0] == X) + XP->_child[0] = R; + else + XP->_child[1] = R; + } + else + CORRUPTION_ERROR_ACTION(this); + if (R != 0) { + if (rtcheck(ok_address(R))) { + tchunkptr C0, C1; + R->_parent = XP; + if ((C0 = X->_child[0]) != 0) { + if (rtcheck(ok_address(C0))) { + R->_child[0] = C0; + C0->_parent = R; + } + else + CORRUPTION_ERROR_ACTION(this); + } + if ((C1 = X->_child[1]) != 0) { + if (rtcheck(ok_address(C1))) { + R->_child[1] = C1; + C1->_parent = R; + } + else + CORRUPTION_ERROR_ACTION(this); + } + } + else + CORRUPTION_ERROR_ACTION(this); + } + } +} + +// Relays to large vs small bin operations + +void malloc_state::insert_chunk(mchunkptr p, size_t s) +{ + if (is_small(s)) + insert_small_chunk(p, s); + else + { + tchunkptr tp = (tchunkptr)(p); + insert_large_chunk(tp, s); + } +} + +void malloc_state::unlink_chunk(mchunkptr p, size_t s) +{ + if (is_small(s)) + unlink_small_chunk(p, s); + else + { + tchunkptr tp = (tchunkptr)(p); + unlink_large_chunk(tp); + } +} + + +// Relays to internal calls to malloc/free from realloc, memalign etc + +#if !ONLY_MSPACES && MSPACES +void *malloc_state::internal_malloc(size_t b) { + return ((this == gm)? dlmalloc(b) : this->_malloc(b)); +} +void malloc_state::internal_free(void *mem) { + if (this == gm) dlfree(mem); else this->_free((malloc_chunk*)mem); +} +#endif + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). + */ + +// Malloc using mmap +void* malloc_state::mmap_alloc(size_t nb) { + size_t mmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK); + if (_footprint_limit != 0) { + size_t fp = _footprint + mmsize; + if (fp <= _footprint || fp > _footprint_limit) + return 0; + } + if (mmsize > nb) { + // Check for wrap around 0 + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->_prev_foot = offset; + p->_head = psize; + mark_inuse_foot(p, psize); + p->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD; + p->chunk_plus_offset(psize+sizeof(size_t))->_head = 0; + + if (_least_addr == 0 || mm < _least_addr) + _least_addr = mm; + if ((_footprint += mmsize) > _max_footprint) + _max_footprint = _footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(p); + return chunk2mem(p); + } + } + return 0; +} + +// Realloc using mmap +mchunkptr malloc_state::mmap_resize(mchunkptr oldp, size_t nb, int flags) { + size_t oldsize = oldp->chunksize(); + (void)flags; // placate people compiling -Wunused + if (is_small(nb)) // Can't shrink mmap regions below small size + return 0; + + // Keep old chunk if big enough but not too big + if (oldsize >= nb + sizeof(size_t) && + (oldsize - nb) <= (mparams._granularity << 1)) + return oldp; + else { + // Only supported on linux + // size_t offset = oldp->_prev_foot; + // size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + // size_t newmmsize = mmap_align(nb + 6 * sizeof(size_t) + CHUNK_ALIGN_MASK); + // + // char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + // oldmmsize, newmmsize, flags); + // if (cp != CMFAIL) { + // mchunkptr newp = (mchunkptr)(cp + offset); + // size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + // newp->_head = psize; + // mark_inuse_foot(newp, psize); + // newp->chunk_plus_offset(psize)->_head = FENCEPOST_HEAD; + // newp->chunk_plus_offset(psize+sizeof(size_t))->_head = 0; + // + // if (cp < _least_addr) + // _least_addr = cp; + // if ((_footprint += newmmsize - oldmmsize) > _max_footprint) + // _max_footprint = _footprint; + // check_mmapped_chunk(newp); + // return newp; + // } + } + return 0; +} + + +// -------------------------- mspace management -------------------------- + +// Initialize top chunk and its size +void malloc_state::init_top(mchunkptr p, size_t psize) { + // Ensure alignment + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + _top = p; + _topsize = psize; + p->_head = psize | PINUSE_BIT; + // set size of fake trailing chunk holding overhead space only once + p->chunk_plus_offset(psize)->_head = TOP_FOOT_SIZE; + _trim_check = mparams._trim_threshold; // reset on each update +} + +// Initialize bins for a new mstate that is otherwise zeroed out +void malloc_state::init_bins() { + // Establish circular links for smallbins + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(i); + bin->_fd = bin->_bk = bin; + } +} + +#if PROCEED_ON_ERROR + +// default corruption action +void malloc_state::reset_on_error() { + int i; + ++malloc_corruption_error_count; + // Reinitialize fields to forget about all memory + _smallmap = _treemap = 0; + _dvsize = _topsize = 0; + _seg._base = 0; + _seg._size = 0; + _seg._next = 0; + _top = _dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(i) = 0; + init_bins(); +} +#endif // PROCEED_ON_ERROR + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +void* malloc_state::prepend_alloc(char* newbase, char* oldbase, size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = (mchunkptr)p->chunk_plus_offset(nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(p, nb); + + assert((char*)oldfirst > (char*)q); + assert(oldfirst->pinuse()); + assert(qsize >= MIN_CHUNK_SIZE); + + // consolidate remainder with first chunk of old base + if (oldfirst == _top) { + size_t tsize = _topsize += qsize; + _top = q; + q->_head = tsize | PINUSE_BIT; + check_top_chunk(q); + } + else if (oldfirst == _dv) { + size_t dsize = _dvsize += qsize; + _dv = q; + q->set_size_and_pinuse_of_free_chunk(dsize); + } + else { + if (!oldfirst->is_inuse()) { + size_t nsize = oldfirst->chunksize(); + unlink_chunk(oldfirst, nsize); + oldfirst = (mchunkptr)oldfirst->chunk_plus_offset(nsize); + qsize += nsize; + } + q->set_free_with_pinuse(qsize, oldfirst); + insert_chunk(q, qsize); + check_free_chunk(q); + } + + check_malloced_chunk(chunk2mem(p), nb); + return chunk2mem(p); +} + +// Add a segment to hold a new noncontiguous region +void malloc_state::add_segment(char* tbase, size_t tsize, flag_t mmapped) { + // Determine locations and sizes of segment, fenceposts, old top + char* old_top = (char*)_top; + msegmentptr oldsp = segment_holding(old_top); + char* old_end = oldsp->_base + oldsp->_size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + 4 * sizeof(size_t) + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = (mchunkptr)sp->chunk_plus_offset(ssize); + mchunkptr p = tnext; + int nfences = 0; + + // reset top to new space + init_top((mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + // Set up segment record + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(sp, ssize); + *ss = _seg; // Push current record + _seg._base = tbase; + _seg._size = tsize; + _seg._sflags = mmapped; + _seg._next = ss; + + // Insert trailing fenceposts + for (;;) { + mchunkptr nextp = (mchunkptr)p->chunk_plus_offset(sizeof(size_t)); + p->_head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->_head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + // Insert the rest of old top into a bin as an ordinary free chunk + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = (mchunkptr)q->chunk_plus_offset(psize); + q->set_free_with_pinuse(psize, tn); + insert_chunk(q, psize); + } + + check_top_chunk(_top); +} + +/* -------------------------- System allocation -------------------------- */ + +// Get memory from system using MORECORE or MMAP +void* malloc_state::sys_alloc(size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + size_t asize; // allocation size + + mparams.ensure_initialization(); + + // Directly map large chunks, but only if already initialized + if (use_mmap() && nb >= mparams._mmap_threshold && _topsize != 0) { + void* mem = mmap_alloc(nb); + if (mem != 0) + return mem; + } + + asize = mparams.granularity_align(nb + SYS_ALLOC_PADDING); + if (asize <= nb) + return 0; // wraparound + if (_footprint_limit != 0) { + size_t fp = _footprint + asize; + if (fp <= _footprint || fp > _footprint_limit) + return 0; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. + */ + + // if (MORECORE_CONTIGUOUS && !use_noncontiguous()) { + // char* br = CMFAIL; + // size_t ssize = asize; // sbrk call size + // msegmentptr ss = (_top == 0)? 0 : segment_holding((char*)_top); + // ACQUIRE_MALLOC_GLOBAL_LOCK(); + // + // if (ss == 0) { + // // First time through or recovery + // char* base = (char*)CALL_MORECORE(0); + // if (base != CMFAIL) { + // size_t fp; + // // Adjust to end on a page boundary + // if (!mparams.is_page_aligned(base)) + // ssize += (mparams.page_align((size_t)base) - (size_t)base); + // fp = _footprint + ssize; // recheck limits + // if (ssize > nb && ssize < HALF_MAX_SIZE_T && + // (_footprint_limit == 0 || + // (fp > _footprint && fp <= _footprint_limit)) && + // (br = (char*)(CALL_MORECORE(ssize))) == base) { + // tbase = base; + // tsize = ssize; + // } + // } + // } + // else { + // // Subtract out existing available top space from MORECORE request. + // ssize = mparams.granularity_align(nb - _topsize + SYS_ALLOC_PADDING); + // // Use mem here only if it did continuously extend old space + // if (ssize < HALF_MAX_SIZE_T && + // (br = (char*)(CALL_MORECORE(ssize))) == ss->_base+ss->_size) { + // tbase = br; + // tsize = ssize; + // } + // } + // + // if (tbase == CMFAIL) { + // // Cope with partial failure + // if (br != CMFAIL) { + // // Try to use/extend the space we did get + // if (ssize < HALF_MAX_SIZE_T && + // ssize < nb + SYS_ALLOC_PADDING) { + // size_t esize = mparams.granularity_align(nb + SYS_ALLOC_PADDING - ssize); + // if (esize < HALF_MAX_SIZE_T) { + // char* end = (char*)CALL_MORECORE(esize); + // if (end != CMFAIL) + // ssize += esize; + // else { + // // Can't use; try to release + // (void) CALL_MORECORE(-ssize); + // br = CMFAIL; + // } + // } + // } + // } + // if (br != CMFAIL) { + // // Use the space we did get + // tbase = br; + // tsize = ssize; + // } + // else + // disable_contiguous(); // Don't try contiguous path in the future + // } + // + // RELEASE_MALLOC_GLOBAL_LOCK(); + // } + + if (HAVE_MMAP && tbase == CMFAIL) { + // Try MMAP + char* mp = (char*)(CALL_MMAP(asize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = asize; + mmap_flag = USE_MMAP_BIT; + } + } + + // if (HAVE_MORECORE && tbase == CMFAIL) { + // // Try noncontiguous MORECORE + // if (asize < HALF_MAX_SIZE_T) { + // char* br = CMFAIL; + // char* end = CMFAIL; + // ACQUIRE_MALLOC_GLOBAL_LOCK(); + // br = (char*)(CALL_MORECORE(asize)); + // end = (char*)(CALL_MORECORE(0)); + // RELEASE_MALLOC_GLOBAL_LOCK(); + // if (br != CMFAIL && end != CMFAIL && br < end) { + // size_t ssize = end - br; + // if (ssize > nb + TOP_FOOT_SIZE) { + // tbase = br; + // tsize = ssize; + // } + // } + // } + // } + + if (tbase != CMFAIL) { + + if ((_footprint += tsize) > _max_footprint) + _max_footprint = _footprint; + + if (!is_initialized()) { + // first-time initialization + if (_least_addr == 0 || tbase < _least_addr) + _least_addr = tbase; + _seg._base = tbase; + _seg._size = tsize; + _seg._sflags = mmap_flag; + _magic = mparams._magic; + _release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(); +#if !ONLY_MSPACES + if (is_global(this)) + init_top((mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else +#endif + { + // Offset top by embedded malloc_state + mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk(); + init_top(mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + // Try to merge with an existing segment + msegmentptr sp = &_seg; + // Only consider most recent segment if traversal suppressed + while (sp != 0 && tbase != sp->_base + sp->_size) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next; + if (sp != 0 && + !sp->is_extern_segment() && + (sp->_sflags & USE_MMAP_BIT) == mmap_flag && + segment_holds(sp, _top)) { + // append + sp->_size += tsize; + init_top(_top, _topsize + tsize); + } + else { + if (tbase < _least_addr) + _least_addr = tbase; + sp = &_seg; + while (sp != 0 && sp->_base != tbase + tsize) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->_next; + if (sp != 0 && + !sp->is_extern_segment() && + (sp->_sflags & USE_MMAP_BIT) == mmap_flag) { + char* oldbase = sp->_base; + sp->_base = tbase; + sp->_size += tsize; + return prepend_alloc(tbase, oldbase, nb); + } + else + add_segment(tbase, tsize, mmap_flag); + } + } + + if (nb < _topsize) { + // Allocate from new or extended top space + size_t rsize = _topsize -= nb; + mchunkptr p = _top; + mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb); + r->_head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(p, nb); + check_top_chunk(_top); + check_malloced_chunk(chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +// Unmap and unlink any mmapped segments that don't contain used chunks +size_t malloc_state::release_unused_segments() { + size_t released = 0; + int nsegs = 0; + msegmentptr pred = &_seg; + msegmentptr sp = pred->_next; + while (sp != 0) { + char* base = sp->_base; + size_t size = sp->_size; + msegmentptr next = sp->_next; + ++nsegs; + if (sp->is_mmapped_segment() && !sp->is_extern_segment()) { + mchunkptr p = align_as_chunk(base); + size_t psize = p->chunksize(); + // Can unmap if first chunk holds entire segment and not pinned + if (!p->is_inuse() && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, p)); + if (p == _dv) { + _dv = 0; + _dvsize = 0; + } + else { + unlink_large_chunk(tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + _footprint -= size; + // unlink obsoleted record + sp = pred; + sp->_next = next; + } + else { + // back out if cannot unmap + insert_large_chunk(tp, psize); + } + } + } + if (NO_SEGMENT_TRAVERSAL) // scan only first segment + break; + pred = sp; + sp = next; + } + // Reset check counter + _release_checks = (((size_t) nsegs > (size_t) MAX_RELEASE_CHECK_RATE)? + (size_t) nsegs : (size_t) MAX_RELEASE_CHECK_RATE); + return released; +} + +int malloc_state::sys_trim(size_t pad) { + size_t released = 0; + mparams.ensure_initialization(); + if (pad < MAX_REQUEST && is_initialized()) { + pad += TOP_FOOT_SIZE; // ensure enough room for segment overhead + + if (_topsize > pad) { + // Shrink top space in _granularity - size units, keeping at least one + size_t unit = mparams._granularity; + size_t extra = ((_topsize - pad + (unit - 1)) / unit - + 1) * unit; + msegmentptr sp = segment_holding((char*)_top); + + if (!sp->is_extern_segment()) { + if (sp->is_mmapped_segment()) { + if (HAVE_MMAP && + sp->_size >= extra && + !has_segment_link(sp)) { + // can't shrink if pinned + size_t newsize = sp->_size - extra; + (void)newsize; // placate people compiling -Wunused-variable + // Prefer mremap, fall back to munmap + if (// (CALL_MREMAP(sp->_base, sp->_size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->_base + newsize, extra) == 0)) { + released = extra; + } + } + } + // else if (HAVE_MORECORE) { + // if (extra >= HALF_MAX_SIZE_T) // Avoid wrapping negative + // extra = (HALF_MAX_SIZE_T) + 1 - unit; + // ACQUIRE_MALLOC_GLOBAL_LOCK(); + // { + // // Make sure end of memory is where we last set it. + // char* old_br = (char*)(CALL_MORECORE(0)); + // if (old_br == sp->_base + sp->_size) { + // char* rel_br = (char*)(CALL_MORECORE(-extra)); + // char* new_br = (char*)(CALL_MORECORE(0)); + // if (rel_br != CMFAIL && new_br < old_br) + // released = old_br - new_br; + // } + // } + // RELEASE_MALLOC_GLOBAL_LOCK(); + // } + } + + if (released != 0) { + sp->_size -= released; + _footprint -= released; + init_top(_top, _topsize - released); + check_top_chunk(_top); + } + } + + // Unmap any unused mmapped segments + if (HAVE_MMAP) + released += release_unused_segments(); + + // On failure, disable autotrim to avoid repeated failed future calls + if (released == 0 && _topsize > _trim_check) + _trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + +/* Consolidate and bin a chunk. Differs from exported versions + of free mainly in that the chunk need not be marked as inuse. + */ +void malloc_state::dispose_chunk(mchunkptr p, size_t psize) { + mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize); + if (!p->pinuse()) { + mchunkptr prev; + size_t prevsize = p->_prev_foot; + if (p->is_mmapped()) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + _footprint -= psize; + return; + } + prev = (mchunkptr)p->chunk_minus_offset(prevsize); + psize += prevsize; + p = prev; + if (rtcheck(ok_address(prev))) { + // consolidate backward + if (p != _dv) { + unlink_chunk(p, prevsize); + } + else if ((next->_head & INUSE_BITS) == INUSE_BITS) { + _dvsize = psize; + p->set_free_with_pinuse(psize, next); + return; + } + } + else { + CORRUPTION_ERROR_ACTION(this); + return; + } + } + if (rtcheck(ok_address(next))) { + if (!next->cinuse()) { + // consolidate forward + if (next == _top) { + size_t tsize = _topsize += psize; + _top = p; + p->_head = tsize | PINUSE_BIT; + if (p == _dv) { + _dv = 0; + _dvsize = 0; + } + return; + } + else if (next == _dv) { + size_t dsize = _dvsize += psize; + _dv = p; + p->set_size_and_pinuse_of_free_chunk(dsize); + return; + } + else { + size_t nsize = next->chunksize(); + psize += nsize; + unlink_chunk(next, nsize); + p->set_size_and_pinuse_of_free_chunk(psize); + if (p == _dv) { + _dvsize = psize; + return; + } + } + } + else { + p->set_free_with_pinuse(psize, next); + } + insert_chunk(p, psize); + } + else { + CORRUPTION_ERROR_ACTION(this); + } +} + +/* ---------------------------- malloc --------------------------- */ + +// allocate a large request from the best fitting chunk in a treebin +void* malloc_state::tmalloc_large(size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; // Unsigned negation + tchunkptr t; + bindex_t idx = compute_tree_index(nb); + if ((t = *treebin_at(idx)) != 0) { + // Traverse tree for this bin looking for node with size == nb + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; // The deepest untaken right subtree + for (;;) { + tchunkptr rt; + size_t trem = t->chunksize() - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->_child[1]; + t = t->_child[(sizebits >> (SIZE_T_BITSIZE - 1)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; // set t to least subtree holding sizes > nb + break; + } + sizebits <<= 1; + } + } + if (t == 0 && v == 0) { + // set t to root of next non-empty treebin + binmap_t leftbits = left_bits(idx2bit(idx)) & _treemap; + if (leftbits != 0) { + binmap_t leastbit = least_bit(leftbits); + bindex_t i = compute_bit2idx(leastbit); + t = *treebin_at(i); + } + } + + while (t != 0) { + // find smallest of tree or subtree + size_t trem = t->chunksize() - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = t->leftmost_child(); + } + + // If dv is a better fit, return 0 so malloc will use it + if (v != 0 && rsize < (size_t)(_dvsize - nb)) { + if (rtcheck(ok_address(v))) { + // split + mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb); + assert(v->chunksize() == rsize + nb); + if (rtcheck(ok_next(v, r))) { + unlink_large_chunk(v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(v, nb); + r->set_size_and_pinuse_of_free_chunk(rsize); + insert_chunk(r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(this); + } + return 0; +} + +// allocate a small request from the best fitting chunk in a treebin +void* malloc_state::tmalloc_small(size_t nb) { + tchunkptr t, v; + size_t rsize; + binmap_t leastbit = least_bit(_treemap); + bindex_t i = compute_bit2idx(leastbit); + v = t = *treebin_at(i); + rsize = t->chunksize() - nb; + + while ((t = t->leftmost_child()) != 0) { + size_t trem = t->chunksize() - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (rtcheck(ok_address(v))) { + mchunkptr r = (mchunkptr)v->chunk_plus_offset(nb); + assert(v->chunksize() == rsize + nb); + if (rtcheck(ok_next(v, r))) { + unlink_large_chunk(v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(v, nb); + r->set_size_and_pinuse_of_free_chunk(rsize); + replace_dv(r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(this); + return 0; +} + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) DLTHROW { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + +#if USE_LOCKS + mparams.ensure_initialization(); // initialize in sys_alloc if not using locks +#endif + return gm->_malloc(bytes); +} + +void dlfree(void* mem) DLTHROW { + /* + Consolidate freed chunks with preceeding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!fm->ok_magic()) { + USAGE_ERROR_ACTION(fm, p); + return; + } + fm->_free(p); +#else // FOOTERS + gm->_free(p); +#endif // FOOTERS + } +} + +void* dlcalloc(size_t n_elements, size_t elem_size) DLTHROW { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; // force downstream failure on overflow + } + mem = dlmalloc(req); + if (mem != 0 && mem2chunk(mem)->calloc_must_clear()) + memset(mem, 0, req); + return mem; +} + +#endif // !ONLY_MSPACES + +/* ---------------------------- malloc --------------------------- */ + +void* malloc_state::_malloc(size_t bytes) { + { + PREACTION(this); + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = _smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { + // Remainderless fit to a smallbin. + mchunkptr b, p; + idx += ~smallbits & 1; // Uses next bin if idx empty + b = smallbin_at(idx); + p = b->_fd; + assert(p->chunksize() == small_index2size(idx)); + unlink_first_small_chunk(b, p, idx); + set_inuse_and_pinuse(p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(mem, nb); + goto postaction; + } + + else if (nb > _dvsize) { + if (smallbits != 0) { + // Use chunk in next nonempty smallbin + mchunkptr b, p, r; + size_t rsize; + binmap_t leftbits = (smallbits << idx) & left_bits(malloc_state::idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + bindex_t i = compute_bit2idx(leastbit); + b = smallbin_at(i); + p = b->_fd; + assert(p->chunksize() == small_index2size(i)); + unlink_first_small_chunk(b, p, i); + rsize = small_index2size(i) - nb; + // Fit here cannot be remainderless if 4byte sizes + if (sizeof(size_t) != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(p, nb); + r = (mchunkptr)p->chunk_plus_offset(nb); + r->set_size_and_pinuse_of_free_chunk(rsize); + replace_dv(r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(mem, nb); + goto postaction; + } + + else if (_treemap != 0 && (mem = tmalloc_small(nb)) != 0) { + check_malloced_chunk(mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; // Too big to allocate. Force failure (in sys alloc) + else { + nb = pad_request(bytes); + if (_treemap != 0 && (mem = tmalloc_large(nb)) != 0) { + check_malloced_chunk(mem, nb); + goto postaction; + } + } + + if (nb <= _dvsize) { + size_t rsize = _dvsize - nb; + mchunkptr p = _dv; + if (rsize >= MIN_CHUNK_SIZE) { + // split dv + mchunkptr r = _dv = (mchunkptr)p->chunk_plus_offset(nb); + _dvsize = rsize; + r->set_size_and_pinuse_of_free_chunk(rsize); + set_size_and_pinuse_of_inuse_chunk(p, nb); + } + else { // exhaust dv + size_t dvs = _dvsize; + _dvsize = 0; + _dv = 0; + set_inuse_and_pinuse(p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(mem, nb); + goto postaction; + } + + else if (nb < _topsize) { + // Split top + size_t rsize = _topsize -= nb; + mchunkptr p = _top; + mchunkptr r = _top = (mchunkptr)p->chunk_plus_offset(nb); + r->_head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(p, nb); + mem = chunk2mem(p); + check_top_chunk(_top); + check_malloced_chunk(mem, nb); + goto postaction; + } + + mem = sys_alloc(nb); + + postaction: + POSTACTION(this); + return mem; + } + + return 0; +} + +/* ---------------------------- free --------------------------- */ + +void malloc_state::_free(mchunkptr p) { + { + PREACTION(this); + check_inuse_chunk(p); + if (rtcheck(ok_address(p) && ok_inuse(p))) { + size_t psize = p->chunksize(); + mchunkptr next = (mchunkptr)p->chunk_plus_offset(psize); + if (!p->pinuse()) { + size_t prevsize = p->_prev_foot; + if (p->is_mmapped()) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + _footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = (mchunkptr)p->chunk_minus_offset(prevsize); + psize += prevsize; + p = prev; + if (rtcheck(ok_address(prev))) { + // consolidate backward + if (p != _dv) { + unlink_chunk(p, prevsize); + } + else if ((next->_head & INUSE_BITS) == INUSE_BITS) { + _dvsize = psize; + p->set_free_with_pinuse(psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (rtcheck(ok_next(p, next) && ok_pinuse(next))) { + if (!next->cinuse()) { + // consolidate forward + if (next == _top) { + size_t tsize = _topsize += psize; + _top = p; + p->_head = tsize | PINUSE_BIT; + if (p == _dv) { + _dv = 0; + _dvsize = 0; + } + if (should_trim(tsize)) + sys_trim(0); + goto postaction; + } + else if (next == _dv) { + size_t dsize = _dvsize += psize; + _dv = p; + p->set_size_and_pinuse_of_free_chunk(dsize); + goto postaction; + } + else { + size_t nsize = next->chunksize(); + psize += nsize; + unlink_chunk(next, nsize); + p->set_size_and_pinuse_of_free_chunk(psize); + if (p == _dv) { + _dvsize = psize; + goto postaction; + } + } + } + else + p->set_free_with_pinuse(psize, next); + + if (is_small(psize)) { + insert_small_chunk(p, psize); + check_free_chunk(p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(tp, psize); + check_free_chunk(p); + if (--_release_checks == 0) + release_unused_segments(); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(this, p); + postaction: + POSTACTION(this); + } +} + +/* ------------ Internal support for realloc, memalign, etc -------------- */ + +// Try to realloc; only in-place unless can_move true +mchunkptr malloc_state::try_realloc_chunk(mchunkptr p, size_t nb, int can_move) { + mchunkptr newp = 0; + size_t oldsize = p->chunksize(); + mchunkptr next = (mchunkptr)p->chunk_plus_offset(oldsize); + if (rtcheck(ok_address(p) && ok_inuse(p) && + ok_next(p, next) && ok_pinuse(next))) { + if (p->is_mmapped()) { + newp = mmap_resize(p, nb, can_move); + } + else if (oldsize >= nb) { + // already big enough + size_t rsize = oldsize - nb; + if (rsize >= MIN_CHUNK_SIZE) { + // split off remainder + mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb); + set_inuse(p, nb); + set_inuse(r, rsize); + dispose_chunk(r, rsize); + } + newp = p; + } + else if (next == _top) { + // extend into top + if (oldsize + _topsize > nb) { + size_t newsize = oldsize + _topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = (mchunkptr)p->chunk_plus_offset(nb); + set_inuse(p, nb); + newtop->_head = newtopsize | PINUSE_BIT; + _top = newtop; + _topsize = newtopsize; + newp = p; + } + } + else if (next == _dv) { + // extend into dv + size_t dvs = _dvsize; + if (oldsize + dvs >= nb) { + size_t dsize = oldsize + dvs - nb; + if (dsize >= MIN_CHUNK_SIZE) { + mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb); + mchunkptr n = (mchunkptr)r->chunk_plus_offset(dsize); + set_inuse(p, nb); + r->set_size_and_pinuse_of_free_chunk(dsize); + n->clear_pinuse(); + _dvsize = dsize; + _dv = r; + } + else { + // exhaust dv + size_t newsize = oldsize + dvs; + set_inuse(p, newsize); + _dvsize = 0; + _dv = 0; + } + newp = p; + } + } + else if (!next->cinuse()) { + // extend into next free chunk + size_t nextsize = next->chunksize(); + if (oldsize + nextsize >= nb) { + size_t rsize = oldsize + nextsize - nb; + unlink_chunk(next, nextsize); + if (rsize < MIN_CHUNK_SIZE) { + size_t newsize = oldsize + nextsize; + set_inuse(p, newsize); + } + else { + mchunkptr r = (mchunkptr)p->chunk_plus_offset(nb); + set_inuse(p, nb); + set_inuse(r, rsize); + dispose_chunk(r, rsize); + } + newp = p; + } + } + } + else { + USAGE_ERROR_ACTION(m, chunk2mem(p)); + } + return newp; +} + +void* malloc_state::internal_memalign(size_t alignment, size_t bytes) { + void* mem = 0; + if (alignment < MIN_CHUNK_SIZE) // must be at least a minimum chunk size + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment - 1)) != 0) { + // Ensure a power of 2 + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + if (bytes >= MAX_REQUEST - alignment) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + mem = internal_malloc(req); + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + PREACTION(this); + if ((((size_t)(mem)) & (alignment - 1)) != 0) { + // misaligned + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((void *)(((size_t)((char*)mem + alignment - 1)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = p->chunksize() - leadsize; + + if (p->is_mmapped()) { + // For mmapped chunks, just adjust offset + newp->_prev_foot = p->_prev_foot + leadsize; + newp->_head = newsize; + } + else { + // Otherwise, give back leader, use the rest + set_inuse(newp, newsize); + set_inuse(p, leadsize); + dispose_chunk(p, leadsize); + } + p = newp; + } + + // Give back spare room at the end + if (!p->is_mmapped()) { + size_t size = p->chunksize(); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = (mchunkptr)p->chunk_plus_offset(nb); + set_inuse(p, nb); + set_inuse(remainder, remainder_size); + dispose_chunk(remainder, remainder_size); + } + } + + mem = chunk2mem(p); + assert (p->chunksize() >= nb); + assert(((size_t)mem & (alignment - 1)) == 0); + check_inuse_chunk(p); + POSTACTION(this); + } + } + return mem; +} + +/* + Common support for independent_X routines, handling + all of the combinations that can result. + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed + */ +void** malloc_state::ialloc(size_t n_elements, size_t* sizes, int opts, + void* chunks[]) { + + size_t element_size; // chunksize of each element, if all same + size_t contents_size; // total size of elements + size_t array_size; // request size of pointer array + void* mem; // malloced aggregate space + mchunkptr p; // corresponding chunk + size_t remainder_size; // remaining bytes while splitting + void** marray; // either "chunks" or malloced ptr array + mchunkptr array_chunk; // chunk for malloced ptr array + flag_t was_enabled; // to disable mmap + size_t size; + size_t i; + + mparams.ensure_initialization(); + // compute array length, if needed + if (chunks != 0) { + if (n_elements == 0) + return chunks; // nothing to do + marray = chunks; + array_size = 0; + } + else { + // if empty req, must still return chunk representing empty array + if (n_elements == 0) + return (void**)internal_malloc(0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + // compute total element size + if (opts & 0x1) { + // all-same-size + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { + // add up all the sizes + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(); + disable_mmap(); + mem = internal_malloc(size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(); + if (mem == 0) + return 0; + + PREACTION(this); + p = mem2chunk(mem); + remainder_size = p->chunksize(); + + assert(!p->is_mmapped()); + + if (opts & 0x2) { + // optionally clear the elements + memset((size_t*)mem, 0, remainder_size - sizeof(size_t) - array_size); + } + + // If not provided, allocate the pointer array as final part of chunk + if (marray == 0) { + size_t array_chunk_size; + array_chunk = (mchunkptr)p->chunk_plus_offset(contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + // split out elements + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements - 1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(p, size); + p = (mchunkptr)p->chunk_plus_offset(size); + } + else { + // the final element absorbs any overallocation slop + set_size_and_pinuse_of_inuse_chunk(p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + // final element must have exactly exhausted chunk + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(mem2chunk(marray[i])); + +#endif + + POSTACTION(this); + return marray; +} + +/* Try to free all pointers in the given array. + Note: this could be made faster, by delaying consolidation, + at the price of disabling some user integrity checks, We + still optimize some consolidations by combining adjacent + chunks before freeing, which will occur often if allocated + with ialloc or the array is sorted. + */ +size_t malloc_state::internal_bulk_free(void* array[], size_t nelem) { + size_t unfreed = 0; + { + PREACTION(this); + void** a; + void** fence = &(array[nelem]); + for (a = array; a != fence; ++a) { + void* mem = *a; + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t psize = p->chunksize(); +#if FOOTERS + if (get_mstate_for(p) != m) { + ++unfreed; + continue; + } +#endif + check_inuse_chunk(p); + *a = 0; + if (rtcheck(ok_address(p) && ok_inuse(p))) { + void ** b = a + 1; // try to merge with next chunk + mchunkptr next = (mchunkptr)p->next_chunk(); + if (b != fence && *b == chunk2mem(next)) { + size_t newsize = next->chunksize() + psize; + set_inuse(p, newsize); + *b = chunk2mem(p); + } + else + dispose_chunk(p, psize); + } + else { + CORRUPTION_ERROR_ACTION(this); + break; + } + } + } + if (should_trim(_topsize)) + sys_trim(0); + POSTACTION(this); + } + return unfreed; +} + +void malloc_state::init(char* tbase, size_t tsize) { + _seg._base = _least_addr = tbase; + _seg._size = _footprint = _max_footprint = tsize; + _magic = mparams._magic; + _release_checks = MAX_RELEASE_CHECK_RATE; + _mflags = mparams._default_mflags; + _extp = 0; + _exts = 0; + disable_contiguous(); + init_bins(); + mchunkptr mn = (mchunkptr)mem2chunk(this)->next_chunk(); + init_top(mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(_top); +} + +/* Traversal */ +#if MALLOC_INSPECT_ALL +void malloc_state::internal_inspect_all(void(*handler)(void *start, void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + if (is_initialized()) { + mchunkptr top = top; + msegmentptr s; + for (s = &seg; s != 0; s = s->next) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && q->head != FENCEPOST_HEAD) { + mchunkptr next = (mchunkptr)q->next_chunk(); + size_t sz = q->chunksize(); + size_t used; + void* start; + if (q->is_inuse()) { + used = sz - CHUNK_OVERHEAD; // must not be mmapped + start = chunk2mem(q); + } + else { + used = 0; + if (is_small(sz)) { + // offset by possible bookkeeping + start = (void*)((char*)q + sizeof(struct malloc_chunk)); + } + else { + start = (void*)((char*)q + sizeof(struct malloc_tree_chunk)); + } + } + if (start < (void*)next) // skip if all space is bookkeeping + handler(start, next, used, arg); + if (q == top) + break; + q = next; + } + } + } +} +#endif // MALLOC_INSPECT_ALL + +/* ------------------ Exported realloc, memalign, etc -------------------- */ + +#if !ONLY_MSPACES + +void* dlrealloc(void* oldmem, size_t bytes) DLTHROW { + void* mem = 0; + if (oldmem == 0) { + mem = dlmalloc(bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + dlfree(oldmem); + } +#endif // REALLOC_ZERO_BYTES_FREES + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else + mstate m = get_mstate_for(oldp); + if (!m->ok_magic()) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif + { + PREACTION(m); + mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + m->check_inuse_chunk(newp); + mem = chunk2mem(newp); + } + else { + mem = m->internal_malloc(bytes); + if (mem != 0) { + size_t oc = oldp->chunksize() - oldp->overhead_for(); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + m->internal_free(oldmem); + } + } + } + } + return mem; +} + +void* dlrealloc_in_place(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = gm; +#else + mstate m = get_mstate_for(oldp); + if (!m->ok_magic()) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif + { + PREACTION(m); + mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + m->check_inuse_chunk(newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* dlmemalign(size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) { + return dlmalloc(bytes); + } + return gm->internal_memalign(alignment, bytes); +} + +int dlposix_memalign(void** pp, size_t alignment, size_t bytes) DLTHROW { + void* mem = 0; + if (alignment == MALLOC_ALIGNMENT) + mem = dlmalloc(bytes); + else { + size_t d = alignment / sizeof(void*); + size_t r = alignment % sizeof(void*); + if (r != 0 || d == 0 || (d & (d - 1)) != 0) + return EINVAL; + else if (bytes <= MAX_REQUEST - alignment) { + if (alignment < MIN_CHUNK_SIZE) + alignment = MIN_CHUNK_SIZE; + mem = gm->internal_memalign(alignment, bytes); + } + } + if (mem == 0) + return ENOMEM; + else { + *pp = mem; + return 0; + } +} + +void* dlvalloc(size_t bytes) DLTHROW { + size_t pagesz; + mparams.ensure_initialization(); + pagesz = mparams._page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + mparams.ensure_initialization(); + pagesz = mparams._page_size; + return dlmemalign(pagesz, (bytes + pagesz - 1) & ~(pagesz - 1)); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; // serves as 1-element array + return gm->ialloc(n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return gm->ialloc(n_elements, sizes, 0, chunks); +} + +size_t dlbulk_free(void* array[], size_t nelem) { + return gm->internal_bulk_free(array, nelem); +} + +#if MALLOC_INSPECT_ALL +void dlmalloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + mparams.ensure_initialization(); + { + PREACTION(gm); + internal_inspect_all(gm, handler, arg); + POSTACTION(gm); + } +} +#endif + +int dlmalloc_trim(size_t pad) { + int result = 0; + mparams.ensure_initialization(); + { + PREACTION(gm); + result = gm->sys_trim(pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->_footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->_max_footprint; +} + +size_t dlmalloc_footprint_limit(void) { + size_t maf = gm->_footprint_limit; + return maf == 0 ? MAX_SIZE_T : maf; +} + +size_t dlmalloc_set_footprint_limit(size_t bytes) { + size_t result; // invert sense of 0 + if (bytes == 0) + result = mparams.granularity_align(1); // Use minimal size + if (bytes == MAX_SIZE_T) + result = 0; // disable + else + result = mparams.granularity_align(bytes); + return gm->_footprint_limit = result; +} + +dlmallinfo dlmalloc_info(void) { + return gm->internal_mallinfo(); +} + +#if !NO_MALLOC_STATS +void dlmalloc_stats() { + gm->internal_malloc_stats(); +} +#endif + +int dlmallopt(int param_number, int value) { + return mparams.change(param_number, value); +} + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (p->is_inuse()) + return p->chunksize() - p->overhead_for(); + } + return 0; +} + +#endif /* !ONLY_MSPACES */ + + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +// Note sure that using spaces is worth the trouble. Each allocation +// then needs FOOTER or has to be freed back to the mspace that it +// originated from. The allocator is already complicated enough. +// Basically this does a virtual allocate on Win for each mspace. +// So that can grow in it's own address range. +// +// mimalloc from Microsoft looks pretty good too. + + +//static mstate init_user_mstate(mstate ms, flag_t flags, char* tbase, size_t tsize) { +// // This is allocating mstate out of passed memory +// //size_t msize = pad_request(sizeof(malloc_state)); +// //mchunkptr msp = align_as_chunk(tbase); +// //mstate m = (mstate)(chunk2mem(msp)); +// // TODO: meset is bad for any types added to class (f.e. mutex) +// //memset(m, 0, msize); +// // +// //(void)INITIAL_LOCK(&m->get_mutex()); +// //msp->_head = (msize | INUSE_BITS); +// +// +// return m; +//} + + + +bool mspace::create(size_t capacity, int locked) { + if (!ms) + ms = make_unique(); // alloc on sys heap + mparams.ensure_initialization(); + size_t msize = 0; // pad_request(sizeof(malloc_state)); + //if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) { + size_t rs = ((capacity == 0)? mparams._granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = mparams.granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + + if (tbase == CMFAIL) { + ms.reset(); + return false; + } + + //init_user_mstate(ms, USE_MMAP_BIT, tbase, tsize); + ms->init(tbase, tsize); + ms->_seg._sflags = USE_MMAP_BIT; + ms->set_lock(locked); + + //} + return true; +} + +bool mspace::create_with_base(void* base, size_t capacity, int locked) { + if (!ms) + ms = make_unique(); // alloc on sys heap + + mparams.ensure_initialization(); + //size_t msize = pad_request(sizeof(malloc_state)); + //if (capacity > msize + TOP_FOOT_SIZE && + //capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams._page_size)) { + ms->init((char*)base, capacity); + //init_user_mstate(ms, EXTERN_BIT, (char*)base, capacity); + ms->_seg._sflags = EXTERN_BIT; + ms->set_lock(locked); + //} + return true; +} + +int mspace::track_large_chunks(int enable) { + int ret = 0; + //mstate ms = (mstate)msp; + //{ + PREACTION(ms); + if (!ms->use_mmap()) { + ret = 1; + } + if (!enable) { + ms->enable_mmap(); + } else { + ms->disable_mmap(); + } + POSTACTION(ms); + //} + return ret; +} + +size_t mspace::destroy() { + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + + size_t freed = 0; + msegmentptr sp = &ms->_seg; + //(void)DESTROY_LOCK(&ms->get_mutex()); // destroy before unmapped + while (sp != 0) { + char* base = sp->_base; + size_t size = sp->_size; + flag_t flag = sp->_sflags; + (void)base; // placate people compiling -Wunused-variable + sp = sp->_next; + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + + ms.reset(); + + return freed; +} + +// ---------------------------- mspace versions of malloc/calloc/free routines -------------------- +void* mspace::_malloc(size_t bytes) { + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ms->_malloc(bytes); +} + +void mspace::_free(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); +#else + mstate fm = ms.get(); +#endif + if (!fm->ok_magic()) { + USAGE_ERROR_ACTION(fm, p); + return; + } + fm->_free(p); + } +} + +void* mspace::calloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; // force downstream failure on overflow + } + mem = ms->internal_malloc(req); + if (mem != 0 && mem2chunk(mem)->calloc_must_clear()) + memset(mem, 0, req); + return mem; +} + +void* mspace::realloc(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem == 0) { + mem = ms->_malloc(bytes); + } + else if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } +#ifdef REALLOC_ZERO_BYTES_FREES + else if (bytes == 0) { + ms->_free(oldmem); + } +#endif + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = ms.get(); +#else + mstate m = get_mstate_for(oldp); + if (!m->ok_magic()) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif + { + PREACTION(m); + mchunkptr newp = m->try_realloc_chunk(oldp, nb, 1); + POSTACTION(m); + if (newp != 0) { + m->check_inuse_chunk(newp); + mem = chunk2mem(newp); + } + else { + mem = m->_malloc(bytes); + if (mem != 0) { + size_t oc = oldp->chunksize() - oldp->overhead_for(); + memcpy(mem, oldmem, (oc < bytes)? oc : bytes); + m->_free((malloc_chunk*)oldmem); + } + } + } + } + return mem; +} + +void* mspace::realloc_in_place(void* oldmem, size_t bytes) { + void* mem = 0; + if (oldmem != 0) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + } + else { + size_t nb = request2size(bytes); + mchunkptr oldp = mem2chunk(oldmem); +#if ! FOOTERS + mstate m = ms.get(); +#else + mstate m = get_mstate_for(oldp); + (void)msp; // placate people compiling -Wunused + if (!m->ok_magic()) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif + { + PREACTION(m); + mchunkptr newp = m->try_realloc_chunk(oldp, nb, 0); + POSTACTION(m); + if (newp == oldp) { + m->check_inuse_chunk(newp); + mem = oldmem; + } + } + } + } + return mem; +} + +void* mspace::memalign(size_t alignment, size_t bytes) { + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (alignment <= MALLOC_ALIGNMENT) + return _malloc(bytes); + return ms->internal_memalign(alignment, bytes); +} + +void** mspace::independent_calloc(size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; // serves as 1-element array + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ms->ialloc(n_elements, &sz, 3, chunks); +} + +void** mspace::independent_comalloc(size_t n_elements, + size_t sizes[], void* chunks[]) { + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ms->ialloc(n_elements, sizes, 0, chunks); +} + +size_t mspace::bulk_free(void* array[], size_t nelem) { + return ms->internal_bulk_free(array, nelem); +} + +#if MALLOC_INSPECT_ALL +void mspace::inspect_all( + void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg) { + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + { + PREACTION(ms); + internal_inspect_all(ms, handler, arg); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif // MALLOC_INSPECT_ALL + +int mspace::trim(size_t pad) { + int result = 0; + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + { + PREACTION(ms); + result = ms->sys_trim(pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +#if !NO_MALLOC_STATS +void mspace::malloc_stats() { + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + ms->internal_malloc_stats(); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} +#endif // NO_MALLOC_STATS + +size_t mspace::footprint() { + size_t result = 0; + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + result = ms->_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace::max_footprint() { + size_t result = 0; + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + result = ms->_max_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace::footprint_limit() { + size_t result = 0; + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + size_t maf = ms->_footprint_limit; + result = (maf == 0) ? MAX_SIZE_T : maf; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +size_t mspace::set_footprint_limit(size_t bytes) { + size_t result = 0; + //mstate ms = (mstate)msp; + if (ms->ok_magic()) { + if (bytes == 0) + result = mparams.granularity_align(1); // Use minimal size + if (bytes == MAX_SIZE_T) + result = 0; // disable + else + result = mparams.granularity_align(bytes); + ms->_footprint_limit = result; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + + +dlmallinfo mspace::mallinfo() { + //mstate ms = (mstate)msp; + if (!ms->ok_magic()) { + USAGE_ERROR_ACTION(ms,ms); + } + return ms->internal_mallinfo(); +} + +size_t mspace::usable_size(const void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (p->is_inuse()) + return p->chunksize() - p->overhead_for(); + } + return 0; +} + +int mspace::mallopt(int param_number, int value) { + return mparams.change(param_number, value); +} + +#endif // MSPACES +} diff --git a/libkram/allocate/dlmalloc.h b/libkram/allocate/dlmalloc.h new file mode 100644 index 00000000..b02bcfdd --- /dev/null +++ b/libkram/allocate/dlmalloc.h @@ -0,0 +1,644 @@ +// See dlmalloc.c for license +#pragma once + +#include // for size_t + +namespace kram { +using namespace STL_NAMESPACE; + +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. + */ +// intended to match mallinfo in "/usr/include/malloc.h" +struct dlmallinfo { + size_t arena; /* non-mmapped space allocated from system */ + size_t ordblks; /* number of free chunks */ + size_t smblks; /* always 0 */ + size_t hblks; /* always 0 */ + size_t hblkhd; /* space in mmapped regions */ + size_t usmblks; /* maximum total allocated space */ + size_t fsmblks; /* always 0 */ + size_t uordblks; /* total allocated space */ + size_t fordblks; /* total free space */ + size_t keepcost; /* releasable (via malloc_trim) space */ +}; + +// can setup as dll, but no one does +#define DLMALLOC_EXPORT extern +// This is noexcept +#define DLTHROW throw () + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. + */ +DLMALLOC_EXPORT void* dlmalloc(size_t) DLTHROW; + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. + */ +DLMALLOC_EXPORT void dlfree(void*) DLTHROW; + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. + */ +DLMALLOC_EXPORT void* dlcalloc(size_t, size_t) DLTHROW; + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. + */ +DLMALLOC_EXPORT void* dlrealloc(void*, size_t) DLTHROW; + +/* + realloc_in_place(void* p, size_t n) + Resizes the space allocated for p to size n, only if this can be + done without moving p (i.e., only if there is adjacent space + available if n is greater than p's current allocated size, or n is + less than or equal to p's size). This may be used instead of plain + realloc if an alternative allocation strategy is needed upon failure + to expand space; for example, reallocation of a buffer that must be + memory-aligned or cleared. You can use realloc_in_place to trigger + these alternatives only when needed. + + Returns p if successful; otherwise null. + */ +DLMALLOC_EXPORT void* dlrealloc_in_place(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. + */ +DLMALLOC_EXPORT void* dlmemalign(size_t, size_t); + +/* + int posix_memalign(void** pp, size_t alignment, size_t n); + Allocates a chunk of n bytes, aligned in accord with the alignment + argument. Differs from memalign only in that it (1) assigns the + allocated memory to *pp rather than returning it, (2) fails and + returns EINVAL if the alignment is not a power of two (3) fails and + returns ENOMEM if memory cannot be allocated. + */ +DLMALLOC_EXPORT int dlposix_memalign(void**, size_t, size_t) DLTHROW; + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. + */ +DLMALLOC_EXPORT void* dlvalloc(size_t) DLTHROW; + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) + */ +DLMALLOC_EXPORT int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. + */ +DLMALLOC_EXPORT size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. + */ +DLMALLOC_EXPORT size_t dlmalloc_max_footprint(void); + +/* + malloc_footprint_limit(); + Returns the number of bytes that the heap is allowed to obtain from + the system, returning the last value returned by + malloc_set_footprint_limit, or the maximum size_t value if + never set. The returned value reflects a permission. There is no + guarantee that this number of bytes can actually be obtained from + the system. + */ +DLMALLOC_EXPORT size_t dlmalloc_footprint_limit(); + +/* + malloc_set_footprint_limit(); + Sets the maximum number of bytes to obtain from the system, causing + failure returns from malloc and related functions upon attempts to + exceed this value. The argument value may be subject to page + rounding to an enforceable limit; this actual value is returned. + Using an argument of the maximum possible size_t effectively + disables checks. If the argument is less than or equal to the + current malloc_footprint, then all future allocations that require + additional system memory will fail. However, invocation cannot + retroactively deallocate existing used memory. + */ +DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes); + +#if MALLOC_INSPECT_ALL +/* + malloc_inspect_all(void(*handler)(void *start, + void *end, + size_t used_bytes, + void* callback_arg), + void* arg); + Traverses the heap and calls the given handler for each managed + region, skipping all bytes that are (or may be) used for bookkeeping + purposes. Traversal does not include include chunks that have been + directly memory mapped. Each reported region begins at the start + address, and continues up to but not including the end address. The + first used_bytes of the region contain allocated data. If + used_bytes is zero, the region is unallocated. The handler is + invoked with the given callback argument. If locks are defined, they + are held during the entire traversal. It is a bad idea to invoke + other malloc functions from within the handler. + + For example, to count the number of in-use chunks with size greater + than 1000, you could write: + static int count = 0; + void count_chunks(void* start, void* end, size_t used, void* arg) { + if (used >= 1000) ++count; + } + then: + malloc_inspect_all(count_chunks, NULL); + + malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. + */ +DLMALLOC_EXPORT void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), + void* arg); + +#endif /* MALLOC_INSPECT_ALL */ + +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. + */ +DLMALLOC_EXPORT dlmallinfo dlmalloc_info(void); + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } + */ +DLMALLOC_EXPORT void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be freed when it is no longer needed. This can be + done all at once using bulk_free. + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. + */ +DLMALLOC_EXPORT void** dlindependent_comalloc(size_t, size_t*, void**); + +/* + bulk_free(void* array[], size_t n_elements) + Frees and clears (sets to null) each non-null pointer in the given + array. This is likely to be faster than freeing them one-by-one. + If footers are used, pointers that have been allocated in different + mspaces are not freed or cleared, and the count of all such pointers + is returned. For large arrays of pointers with poor locality, it + may be worthwhile to sort this array before calling bulk_free. + */ +DLMALLOC_EXPORT size_t dlbulk_free(void**, size_t n_elements); + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +DLMALLOC_EXPORT void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. + */ +DLMALLOC_EXPORT int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. + */ +DLMALLOC_EXPORT void dlmalloc_stats(void); + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); + */ +size_t dlmalloc_usable_size(void*); + +//#endif /* ONLY_MSPACES */ + +// This is only supported if mmap is too +//#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. + */ +//typedef void* mspace; +class mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). + */ +//DLMALLOC_EXPORT mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. + */ +//DLMALLOC_EXPORT size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. + */ +//DLMALLOC_EXPORT mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +using mstate = class malloc_state*; + +class mspace { +public: + // see comments above + bool create(size_t capacity, int locked); + bool create_with_base(void* base, size_t capacity, int locked); + size_t destroy(); + + /* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. + */ + int track_large_chunks(int enable); + + + /* + mspace_malloc behaves as malloc, but operates within + the given space. + */ + void* _malloc(size_t bytes); + + /* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. + */ + void _free(void* mem); + + /* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. + */ + void* realloc(void* mem, size_t newsize); + + /* + mspace_calloc behaves as calloc, but operates within + the given space. + */ + void* calloc(size_t n_elements, size_t elem_size); + + /* + mspace_memalign behaves as memalign, but operates within + the given space. + */ + void* memalign(size_t alignment, size_t bytes); + + /* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. + */ + void** independent_calloc(size_t n_elements, + size_t elem_size, void* chunks[]); + + /* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. + */ + void** independent_comalloc(size_t n_elements, + size_t sizes[], void* chunks[]); + + /* + mspace_footprint() returns the number of bytes obtained from the + system for this space. + */ + size_t footprint(); + + /* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. + */ + size_t max_footprint(); + + + /* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. + */ + dlmallinfo mallinfo(); + + /* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; + */ + size_t usable_size(const void* mem); + + /* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. + */ + void malloc_stats(); + + /* + mspace_trim behaves as malloc_trim, but + operates within the given space. + */ + int trim(size_t pad); + + /* + An alias for mallopt. + */ + int mallopt(int, int); + +private: + void* realloc_in_place(void* oldmem, size_t bytes); + size_t bulk_free(void* array[], size_t nelem); + size_t footprint_limit(); + size_t set_footprint_limit(size_t bytes); + + unique_ptr ms; +}; + +//#endif /* MSPACES */ + +#undef DLMALLOC_EXPORT +#undef DLTHROW + +} diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp index fbf01a97..04afa825 100644 --- a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp +++ b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp @@ -24,7 +24,7 @@ #include #include #include -#include +//#include #include "astcenc_diagnostic_trace.h" diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.h b/libkram/astc-encoder/astcenc_diagnostic_trace.h index 61489498..96c97c87 100644 --- a/libkram/astc-encoder/astcenc_diagnostic_trace.h +++ b/libkram/astc-encoder/astcenc_diagnostic_trace.h @@ -158,7 +158,7 @@ class TraceLog /** * @brief The stack of nodes (newest at the back). */ - std::vector m_stack; + vector m_stack; private: /** diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h index aa7f6001..0c689ac0 100644 --- a/libkram/astc-encoder/astcenc_internal.h +++ b/libkram/astc-encoder/astcenc_internal.h @@ -22,15 +22,21 @@ #ifndef ASTCENC_INTERNAL_INCLUDED #define ASTCENC_INTERNAL_INCLUDED -#include -#include +//#include +//#include + +#define ASTCENC_USE_THREADS 0 +#if ASTCENC_USE_THREADS + // these pull in string from system_error which is slow to instantiate on macOS + #include + #include + #include +#endif + #include #include #include #include -#include -#include -#include #include #include "astcenc.h" @@ -210,21 +216,27 @@ static_assert((WEIGHTS_MAX_BLOCK_MODES % ASTCENC_SIMD_WIDTH) == 0, class ParallelManager { private: + +#if ASTCENC_USE_THREADS /** @brief Lock used for critical section and condition synchronization. */ std::mutex m_lock; + /** @brief Contition variable for tracking stage processing completion. */ + std::condition_variable m_complete; + + /** @brief Number of tasks started, but not necessarily finished. */ + std::atomic m_start_count; +#else + + unsigned int m_start_count; +#endif + /** @brief True if the stage init() step has been executed. */ bool m_init_done; /** @brief True if the stage term() step has been executed. */ bool m_term_done; - /** @brief Contition variable for tracking stage processing completion. */ - std::condition_variable m_complete; - - /** @brief Number of tasks started, but not necessarily finished. */ - std::atomic m_start_count; - /** @brief Number of tasks finished. */ unsigned int m_done_count; @@ -253,6 +265,82 @@ class ParallelManager m_task_count = 0; } +#if !ASTCENC_USE_THREADS + void init(std::function init_func) + { + if (!m_init_done) + { + m_task_count = init_func(); + m_init_done = true; + } + } + + void init(unsigned int task_count) + { + if (!m_init_done) + { + m_task_count = task_count; + m_init_done = true; + } + } + + unsigned int get_task_assignment(unsigned int granule, unsigned int& count) + { + // match fetch_add which addsd to m_start_count atomically then returns original m_start_count + unsigned int base = m_start_count; + m_start_count += granule; + + if (base >= m_task_count) + { + count = 0; + return 0; + } + + count = astc::min(m_task_count - base, granule); + return base; + } + + void complete_task_assignment(unsigned int count) + { + // Note: m_done_count cannot use an atomic without the mutex; this has a race between the + // update here and the wait() for other threads + m_done_count += count; + +// if (m_done_count == m_task_count) +// { +// lck.unlock(); +// m_complete.notify_all(); +// } + } + + /** + * @brief Wait for stage processing to complete. + */ + void wait() + { + // no wait + } + + /** + * @brief Trigger the pipeline stage term step. + * + * This can be called from multi-threaded code. The first thread to hit this will process the + * thread termintion. Caller must have called @c wait() prior to calling this function to ensure + * that processing is complete. + * + * @param term_func Callable which executes the stage termination. + */ + void term(std::function term_func) + { + if (!m_term_done) + { + term_func(); + m_term_done = true; + } + } + +#else + /** * @brief Trigger the pipeline stage init step. * @@ -361,6 +449,8 @@ class ParallelManager m_term_done = true; } } + +#endif }; /* ============================================================================ diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp index f276ac7e..82d5a1b8 100644 --- a/libkram/astc-encoder/astcenc_mathlib.cpp +++ b/libkram/astc-encoder/astcenc_mathlib.cpp @@ -46,3 +46,76 @@ uint64_t astc::rand(uint64_t state[2]) state[1] = rotl(s1, 37); return res; } + +#if SIMD_SSE + +/* ============================================================================ + Softfloat library with fp32 and fp16 conversion functionality. +============================================================================ */ +//#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) +// /* narrowing float->float conversions */ +// uint16_t float_to_sf16(float val); +// float sf16_to_float(uint16_t val); +//#endif + +vint4 float_to_float16(vfloat4 a) +{ +//#if ASTCENC_F16C >= 1 + __m128i packedf16 = _mm_cvtps_ph(a.m, 0); + __m128i f16 = _mm_cvtepu16_epi32(packedf16); + return vint4(f16); +//#else +// return vint4( +// float_to_sf16(a.lane<0>()), +// float_to_sf16(a.lane<1>()), +// float_to_sf16(a.lane<2>()), +// float_to_sf16(a.lane<3>())); +//#endif +} + +/** + * @brief Return a float16 value for a float scalar, using round-to-nearest. + */ +uint16_t float_to_float16(float a) +{ +//#if ASTCENC_F16C >= 1 + __m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0); + return static_cast(_mm_cvtsi128_si32(f16)); +//#else +// return float_to_sf16(a); +//#endif +} + +/** + * @brief Return a float value for a float16 vector. + */ +vfloat4 float16_to_float(vint4 a) +{ +//#if ASTCENC_F16C >= 1 + __m128i packed = _mm_packs_epi32(a.m, a.m); + __m128 f32 = _mm_cvtph_ps(packed); + return vfloat4(f32); +//#else +// return vfloat4( +// sf16_to_float(a.lane<0>()), +// sf16_to_float(a.lane<1>()), +// sf16_to_float(a.lane<2>()), +// sf16_to_float(a.lane<3>())); +//#endif +} + +/** + * @brief Return a float value for a float16 scalar. + */ +float float16_to_float(uint16_t a) +{ +//#if ASTCENC_F16C >= 1 + __m128i packed = _mm_set1_epi16(a); + __m128 f32 = _mm_cvtph_ps(packed); + return _mm_cvtss_f32(f32); +//#else +// return sf16_to_float(a); +//#endif +} + +#endif diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h index 67e989e7..ebad96c3 100644 --- a/libkram/astc-encoder/astcenc_mathlib.h +++ b/libkram/astc-encoder/astcenc_mathlib.h @@ -27,62 +27,81 @@ #include #include -#ifndef ASTCENC_POPCNT - #if defined(__POPCNT__) - #define ASTCENC_POPCNT 1 +#ifndef ASTCENC_NEON + #if defined(__aarch64__) + #define ASTCENC_NEON 1 #else - #define ASTCENC_POPCNT 0 + #define ASTCENC_NEON 0 #endif #endif -#ifndef ASTCENC_F16C - #if defined(__F16C__) - #define ASTCENC_F16C 1 - #else - #define ASTCENC_F16C 0 - #endif -#endif +#if ASTCENC_NEON -#ifndef ASTCENC_SSE - #if defined(__SSE4_2__) - #define ASTCENC_SSE 42 - #elif defined(__SSE4_1__) - #define ASTCENC_SSE 41 - #elif defined(__SSE3__) - #define ASTCENC_SSE 30 - #elif defined(__SSE2__) - #define ASTCENC_SSE 20 - #else + // Intel simd ops #define ASTCENC_SSE 0 - #endif -#endif - -#ifndef ASTCENC_AVX - #if defined(__AVX2__) - #define ASTCENC_AVX 2 - #elif defined(__AVX__) - #define ASTCENC_AVX 1 - #else #define ASTCENC_AVX 0 - #endif -#endif -#ifndef ASTCENC_NEON - #if defined(__aarch64__) - #define ASTCENC_NEON 1 - #else - #define ASTCENC_NEON 0 - #endif -#endif + // Keep alignment at 16B + #define ASTCENC_VECALIGN 16 + + // These have equivalents in Neon + #define ASTCENC_POPCNT 0 + #define ASTCENC_F16C 0 + -#if ASTCENC_AVX - #define ASTCENC_VECALIGN 32 #else - #define ASTCENC_VECALIGN 16 -#endif -#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 - #include + #ifndef ASTCENC_SSE + #if defined(__SSE4_2__) + #define ASTCENC_SSE 42 + #elif defined(__SSE4_1__) + #define ASTCENC_SSE 41 + #elif defined(__SSE3__) + #define ASTCENC_SSE 30 + #elif defined(__SSE2__) + #define ASTCENC_SSE 20 + #else + #define ASTCENC_SSE 0 + #endif + #endif + + #ifndef ASTCENC_AVX + #if defined(__AVX2__) + #define ASTCENC_AVX 2 + #elif defined(__AVX__) + #define ASTCENC_AVX 1 + #else + #define ASTCENC_AVX 0 + #endif + #endif + + // must set -fpopcount + #ifndef ASTCENC_POPCNT + #if defined(__POPCNT__) + #define ASTCENC_POPCNT 1 + #else + #define ASTCENC_POPCNT 0 + #endif + #endif + + // must set -mf16c only on x86_64 build, avx not enough on clang + #ifndef ASTCENC_F16C + #if defined(__F16C__) + #define ASTCENC_F16C 1 + #else + #define ASTCENC_F16C 0 + #endif + #endif + + //#if ASTCENC_AVX + // #define ASTCENC_VECALIGN 32 + //#else + #define ASTCENC_VECALIGN 16 + //#endif + + #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 || ASTCENC_F16C != 0 + #include + #endif #endif /* ============================================================================ @@ -418,15 +437,6 @@ uint64_t rand(uint64_t state[2]); } -/* ============================================================================ - Softfloat library with fp32 and fp16 conversion functionality. -============================================================================ */ -#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) - /* narrowing float->float conversions */ - uint16_t float_to_sf16(float val); - float sf16_to_float(uint16_t val); -#endif - /********************************* Vector library *********************************/ diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp index d95fb9da..f3f7bd54 100644 --- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp +++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp @@ -18,7 +18,8 @@ /** * @brief Soft-float library for IEEE-754. */ -#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) +// Chopped out +#if 0 // (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) #include "astcenc_mathlib.h" diff --git a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp index 4158da31..b22cc1bf 100644 --- a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp +++ b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp @@ -47,7 +47,7 @@ static bool g_cpu_has_f16c { false }; ============================================================================ */ #if !defined(__clang__) && defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN -#include +#include #include /** diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h index fe8a1b16..360c0969 100755 --- a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h +++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h @@ -125,7 +125,11 @@ struct vfloat8 */ static ASTCENC_SIMD_INLINE vfloat8 loada(const float* p) { +#if ASTCENC_VECALIGN == 32 return vfloat8(_mm256_load_ps(p)); +#else // 16 + return vfloat8(_mm256_loadu_ps(p)); +#endif } /** @@ -242,7 +246,11 @@ struct vint8 */ static ASTCENC_SIMD_INLINE vint8 loada(const int* p) { +#if ASTCENC_VECALIGN == 32 return vint8(_mm256_load_si256(reinterpret_cast(p))); +#else // 16 + return vint8(_mm256_loadu_si256(reinterpret_cast(p))); +#endif } /** @@ -534,7 +542,11 @@ ASTCENC_SIMD_INLINE vint8 hmax(vint8 a) */ ASTCENC_SIMD_INLINE void storea(vint8 a, int* p) { - _mm256_store_si256(reinterpret_cast<__m256i*>(p), a.m); +#if ASTCENC_VECALIGN == 32 + _mm256_store_si256(reinterpret_cast<__m256i*>(p), a.m); +#else // 16 + _mm256_storeu_si256(reinterpret_cast<__m256i*>(p), a.m); +#endif } /** @@ -961,7 +973,11 @@ ASTCENC_SIMD_INLINE void store(vfloat8 a, float* p) */ ASTCENC_SIMD_INLINE void storea(vfloat8 a, float* p) { - _mm256_store_ps(p, a.m); +#if ASTCENC_VECALIGN == 32 + _mm256_store_ps(p, a.m); +#else // 16 + _mm256_storeu_ps(p, a.m); +#endif } /** diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h index 5a399ef5..6aad161e 100644 --- a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h +++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h @@ -40,7 +40,7 @@ #error "Include astcenc_vecmathlib.h, do not include directly" #endif -#include +//#include #include #include #include diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h index 868522dc..aaf5dccb 100755 --- a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h +++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h @@ -941,65 +941,23 @@ ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a) /** * @brief Return a float16 value for a float vector, using round-to-nearest. */ -ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a) -{ -#if ASTCENC_F16C >= 1 - __m128i packedf16 = _mm_cvtps_ph(a.m, 0); - __m128i f16 = _mm_cvtepu16_epi32(packedf16); - return vint4(f16); -#else - return vint4( - float_to_sf16(a.lane<0>()), - float_to_sf16(a.lane<1>()), - float_to_sf16(a.lane<2>()), - float_to_sf16(a.lane<3>())); -#endif -} +vint4 float_to_float16(vfloat4 a); /** * @brief Return a float16 value for a float scalar, using round-to-nearest. */ -static inline uint16_t float_to_float16(float a) -{ -#if ASTCENC_F16C >= 1 - __m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0); - return static_cast(_mm_cvtsi128_si32(f16)); -#else - return float_to_sf16(a); -#endif -} +uint16_t float_to_float16(float a); /** * @brief Return a float value for a float16 vector. */ -ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a) -{ -#if ASTCENC_F16C >= 1 - __m128i packed = _mm_packs_epi32(a.m, a.m); - __m128 f32 = _mm_cvtph_ps(packed); - return vfloat4(f32); -#else - return vfloat4( - sf16_to_float(a.lane<0>()), - sf16_to_float(a.lane<1>()), - sf16_to_float(a.lane<2>()), - sf16_to_float(a.lane<3>())); -#endif -} +vfloat4 float16_to_float(vint4 a); + /** * @brief Return a float value for a float16 scalar. */ -ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a) -{ -#if ASTCENC_F16C >= 1 - __m128i packed = _mm_set1_epi16(a); - __m128 f32 = _mm_cvtph_ps(packed); - return _mm_cvtss_f32(f32); -#else - return sf16_to_float(a); -#endif -} +float float16_to_float(uint16_t a); /** * @brief Return a float value as an integer bit pattern (i.e. no conversion). diff --git a/libkram/ate/ateencoder.mm b/libkram/ate/ateencoder.mm index 1836ebda..265110a8 100644 --- a/libkram/ate/ateencoder.mm +++ b/libkram/ate/ateencoder.mm @@ -2,7 +2,7 @@ #if COMPILE_ATE -#include +//#include #include "KTXImage.h" // for MyMTLPixelFormat // this contains ATE encoder (libate.dylib) @@ -438,7 +438,7 @@ inline my_at_block_format_t pixelToDecoderFormat(MyMTLPixelFormat format, bool i // decode is leaving a=60 for some bizarro reason, so correct that if (srcAlphaType == at_alpha_opaque) { - for (uint32_t i = 0, iEnd = w*h; i < iEnd; ++i) + for (uint32_t i = 0, iEnd = (uint32_t)w*h; i < iEnd; ++i) { dstData[4*i+3] = 255; } diff --git a/libkram/bc7enc/LICENSE b/libkram/bc7enc/LICENSE index 71e10daf..b3b1f69b 100644 --- a/libkram/bc7enc/LICENSE +++ b/libkram/bc7enc/LICENSE @@ -1,10 +1,12 @@ -The following source code files are available under 2 licenses -- choose whichever you prefer: -rgbcx.h -bc7decomp.cpp/h -bc7enc.c +If you use this software in a product, attribution / credits is requested but not required. + +bc7e.ispc uses the Apache 2.0 license and is Copyright (C) 2018-2021 Binomial LLC. +LodePNG is Copyright (c) 2005-2016 Lode Vandevenne. See LodePNG.cpp for its license. + +All other source code files in this repo are available under 2 licenses -- choose whichever you prefer. ALTERNATIVE A - MIT License -Copyright(c) 2020 Richard Geldreich, Jr. +Copyright(c) 2020-2021 Richard Geldreich, Jr. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal in the Software without restriction, including without limitation the rights to diff --git a/libkram/bc7enc/README.md b/libkram/bc7enc/README.md index 8f762b2d..b86777f5 100644 --- a/libkram/bc7enc/README.md +++ b/libkram/bc7enc/README.md @@ -1,146 +1,163 @@ -bc7enc - Fast, single source file BC1-5 and BC7/BPTC GPU texture encoders. +bc7enc - Fast BC1-7 GPU texture encoders with Rate Distortion Optimization (RDO) -Features: -- BC1/3 encoder (in [rgbcx.h](https://github.com/richgel999/bc7enc/blob/master/rgbcx.h)) uses a new algorithm (which we've named "prioritized cluster fit") which is 3-4x faster than traditional cluster fit (as implemented in [libsquish](https://github.com/svn2github/libsquish) with SSE2) at the same or slightly higher average quality using scalar CPU instructions. This algorithm is suitable for GPU encoder implementations. +This repo contains fast texture encoders for BC1-7. All formats support a simple post-processing transform on the encoded texture data designed to trade off quality for smaller compressed file sizes using LZ compression. Significant (10-50%) size reductions are possible. The BC7 encoder also supports a "reduced entropy" mode using the -e option which causes the output to be biased/weighted in various ways which minimally impact quality, which results in 5-10% smaller file sizes with no slowdowns in encoding time. -The BC1/BC3 encoder also implements [Castano's optimal endpoint rounding improvement](https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf). +Currently, the entropy reduction transform is tuned for Deflate, LZHAM, or LZMA. The method used to control the rate-distortion tradeoff is the classic Lagrangian multiplier RDO method, modified to favor MSE on very smooth blocks. Rate is approximated using a fixed Deflate model. The post-processing transform applied to the encoded texture data tries to introduce the longest match it can into every encoded output block. It also tries to continue matches between blocks and (specifically for codecs like LZHAM/LZMA/Zstd) it tries to utilize REP0 (repeat) matches. -rgbcx's BC1 encoder is faster than both AMD Compressonator and libsquish at the same average quality. +You can see examples of the RDO BC7 encoder's current output [here](https://richg42.blogspot.com/2021/02/more-rdo-bc7-encoding.html). Some examples on how to use the command line tool are on my blog, [here](https://richg42.blogspot.com/2021/02/how-to-use-bc7encrdo.html). -- BC7 encoder (in bc7enc.c/.h) has perceptual colorspace metric support, and is very fast compared to ispc_texcomp (see below) for RGB textures. Important: The BC7 encoder included in this repo is still a work in progress. I took bc7enc16 and added more modes for better alpha support, but it needs more testing and development. +This repo contains both [bc7e.ispc](https://github.com/BinomialLLC/bc7e) and its distantly related but weaker 4 mode only non-ispc variant, bc7enc.cpp. By default, if you set SUPPORT_BC7E=TRUE when running cmake, you get bc7e.ispc, otherwise you get bc7enc.cpp. (The -C option forces bc7enc.cpp.) bc7e supports all BC7 modes and features, but doesn't yet support reduced entropy BC7 encoding. bc7enc.cpp supports optional reduced entropy encoding (using -e with the command line tool). RDO BC7 is supported when using either encoder, however. -- Full decoders for BC1-5/7. BC7 decoder is in bc7decomp.cpp/.h, BC1-5 decoders in rgbcx.h. +The next major focus will be improving the default smooth block handling and improving rate distorton performance. -This project is basically a demo of some of the techniques we use in Basis BC7, -which is Binomial's state of the art vectorized BC7 encoder. Basis BC7 is the -highest quality and fastest CPU BC7 encoder available (2-3x faster than -ispc_texcomp). It supports all modes and linear/perceptual colorspace metrics. -Licensees get full ISPC source code so they can customize the codec as needed. +This repo was originally derived from [bc7enc](https://github.com/richgel999/bc7enc) and [bc7e](https://github.com/BinomialLLC/bc7e). Note this repo contains the latest version of bc7e.ispc, which has a determinism bug fix. -bc7enc currently only supports modes 1 and 6 for RGB, and modes 1, 5, 6, and 7 for alpha. The plan is to add all the modes. See the [bc7enc16](https://github.com/richgel999/bc7enc16) project for the previous version (which only supports modes 1 and 6). Note this readme still refers to "bc7enc16", but bc7enc is the same encoder but with more alpha modes. +**Note: If you use this software in a product, attribution / credits is requested but not required. Thanks!** -This codec supports a perceptual mode when encoding BC7, where it computes colorspace error in -weighted YCbCr space (like etc2comp), and it also supports weighted RGBA -metrics. It's particular strong in perceptual mode, beating the current state of -the art CPU encoder (Intel's ispc_texcomp) by a wide margin when measured by -Luma PSNR, even though it only supports 2 modes and isn't vectorized. +### Compiling -Why only modes 1 and 6 for opaque BC7? -Because with these two modes you have a complete encoder that supports both -opaque and transparent textures in a small amount (~1400 lines) of -understandable plain C code. Mode 6 excels on smooth blocks, and mode 1 is -strong with complex blocks, and a strong encoder that combines both modes can be -quite high quality. Fast mode 6-only encoders will have noticeable block -artifacts which this codec avoids by fully supporting mode 1. +This build has been tested with MSVC 2019 x64 and clang 6.0.0 under Ubuntu v18.04. -Modes 1 and 6 are typically the most used modes on many textures using other -encoders. Mode 1 has two subsets, 64 possible partitions, and 3-bit indices, -while mode 6 has large 4-bit indices and high precision 7777.1 endpoints. This -codec produces output that is far higher quality than any BC1 encoder, and -approaches (or in perceptual mode exceeds!) the quality of other full BC7 -encoders. +To compile with bc7e.ispc (on Linux this requires [Intel's ISPC compiler](https://ispc.github.io/downloads.html) to be in your path - recommended): -Why is bc7enc16 so fast in perceptual mode? -Computing error in YCbCr space is more expensive than in RGB space, yet bc7enc16 -in perceptual mode is stronger than ispc_texcomp (see the benchmark below) - -even without SSE/AVX vectorization and with only 2 modes to work with! +``` +cmake -D SUPPORT_BC7E=TRUE . +make +``` + +To compile without BC7E: + +``` +cmake . +make +``` -Most BC7 encoders only support linear RGB colorspace metrics, which is a -fundamental weakness. Some support weighted RGB metrics, which is better. With -linear RGB metrics, encoding error is roughly balanced between each channel, and -encoders have to work *very* hard (examining large amounts of RGB search space) -to get overall quality up. With perceptual colorspace metrics, RGB error tends -to become a bit unbalanced, with green quality favored more highly than red and -blue, and blue quality favored the least. A perceptual encoder is tuned to -prefer exploring solutions along the luma axis, where it's much less work to find -solutions with less luma error. bc7enc16 is, as far as I know, the first BC7 -codec to support computing error in weighted YCbCr colorspace. +Note the MSVC and Linux builds enable OpenMP for faster compression. -Note: Most of the timings here (except for the ispc_texcomp "fast" mode timings at the very bottom) -are for the *original* release, before I added several more optimizations. The latest version of -bc7enc16.c is around 8-27% faster than the initial release at same quality (when mode 1 is enabled - -there's no change with just mode 6). +### Examples -Some benchmarks across 31 images (kodim corpus+others): +The [.DDS](https://docs.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-pguide) output files can be loaded/viewed using tools like [AMD Compressonator](https://gpuopen.com/compressonator/). -Perceptual (average REC709 Luma PSNR - higher is better quality): +To encode to non-RDO BC7 using BC7E, highest quality, linear RGB(A) metrics: + +``` +./bc7enc blah.png ``` -iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 355.4 secs 48.6 dB -bc7enc16: 122.6 secs 50.0 dB -iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64 -iscp_texcomp: 355.4 secs 48.6 dB -bc7enc16: 38.3 secs 49.6 dB +To encode to non-RDO BC7 using BC7E, highest quality, using perceptual (scaled YCbCr) colorspace error metrics: -iscp_texcomp basic vs. bc7enc16 uber0/max_partitions 16 -ispc_texcomp: 100.2 secs 48.3 dB -bc7enc16: 20.8 secs 49.3 dB +``` +./bc7enc blah.png -s +``` -iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 16 -iscp_texcomp: 41.5 secs 48.0 dB -bc7enc16: 20.8 secs 49.3 dB +To encode to RDO BC7 using BC7E, highest quality, lambda=.5, linear metrics (perceptual colorspace metrics are always automatically disabled when -z is specified), with a balance of encoding performance vs. RDO efficiency: -iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0 -iscp_texcomp: 1.9 secs 46.2 dB -bc7enc16: 8.9 secs 48.4 dB +``` +./bc7enc blah.png -z.5 +``` -Non-perceptual (average RGB PSNR): +To encode to RDO BC7 using BC7E, lower baseline quality (-u4) for faster encoding, lambda=.5, and with faster encoding (only inject one match vs two, with a tiny RDO lookback window size of 16 bytes): -iscp_texcomp slow vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 355.4 secs 46.8 dB -bc7enc16: 51 secs 46.1 dB +``` +./bc7enc blah.png -u4 -z.5 -ze -zc16 +``` -iscp_texcomp slow vs. bc7enc16 uber0/max_partitions 64 -iscp_texcomp: 355.4 secs 46.8 dB -bc7enc16: 29.3 secs 45.8 dB +To encode to non-RDO BC7 using entropy reduced or quantized/weighted BC7 (no slowdown vs. non-RDO bc7enc.cpp for BC7, slightly reduced quality, but 5-10% better LZ compression, only uses 2 or 4 BC7 modes): -iscp_texcomp basic vs. bc7enc16 uber4/max_partitions 64 -iscp_texcomp: 99.9 secs 46.5 dB -bc7enc16: 51 secs 46.1 dB +``` +./bc7enc blah.png -C -e +``` -iscp_texcomp fast vs. bc7enc16 uber1/max_partitions 16 -ispc_texcomp: 41.5 secs 46.1 dB -bc7enc16: 19.8 secs 45.5 dB +To encode to RDO BC7 using the entropy reduction transform combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes: -iscp_texcomp fast vs. bc7enc16 uber0/max_partitions 8 -ispc_texcomp: 41.5 secs 46.1 dB -bc7enc16: 10.46 secs 44.4 dB +``` +./bc7enc -zc256 blah.png -C -e -z1.0 +``` -iscp_texcomp ultrafast vs. bc7enc16 uber0/max_partitions 0 -ispc_texcomp: 1.9 secs 42.7 dB -bc7enc16: 3.8 secs 42.7 dB +Same as before, but higher compression (allow 2 matches per block instead of 1): -DirectXTex CPU in "mode 6 only" mode vs. bc7enc16 uber1/max_partions 0 (mode 6 only), non-perceptual: - -DirectXTex: 466.4 secs 41.9 dB -bc7enc16: 6.7 secs 42.8 dB +``` +./bc7enc -zc256 blah.png -C -e -z1.0 -zn +``` -DirectXTex CPU in (default - no 3 subset modes) vs. bc7enc16 uber1/max_partions 64, non-perceptual: +Same, except disable ultra-smooth block handling: -DirectXTex: 9485.1 secs 45.6 dB -bc7enc16: 36 secs 46.0 dB -``` -(Note this version of DirectXTex has a key pbit bugfix which I've submitted but -is still waiting to be accepted. Non-bugfixed versions will be slightly lower -quality.) +``` +./bc7enc -zc256 blah.png -C -e -z1.0 -zu +``` -UPDATE: To illustrate how strong the mode 1+6 implementation is in bc7enc16, let's compare ispc_texcomp -fast vs. the latest version of bc7enc16 uber4/max_partitions 64: +To encode to RDO BC7 using the entropy reduction transform at lower quality, combined with reduced entropy BC7 encoding, with a slightly larger window size than the default which is 128 bytes: -Without filterbank optimizations: ``` - Time RGB PSNR Y PSNR -ispc_texcomp: 41.45 secs 46.09 dB 48.0 dB -bc7enc16: 41.42 secs 46.03 dB 48.2 dB +./bc7enc -zc256 blah.png -C -e -z2.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, without using reduced entropy BC7 encoding: -With filterbank optimizations enabled: -bc7enc16: 38.78 secs 45.94 dB 48.12 dB ``` -They both have virtually the same average RGB PSNR with these settings (.06 dB is basically noise), but -bc7enc16 is just as fast as ispc_texcomp fast, even though it's not vectorized. Interestingly, our Y PSNR is better, -although bc7enc16 wasn't using perceptual metrics in these benchmarks. +./bc7enc -zc1024 blah.png -z1.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, with a manually specified max smooth block max error scale: + +``` +./bc7enc -zc1024 blah.png -z2.0 -zb30.0 +``` + +To encode to RDO BC7 using the entropy reduction transform at higher effectivenes using a larger window size, using only mode 6 (more block artifacts, but better rate-distortion performance as measured by PSNR): + +``` +./bc7enc -zc1024 blah.png -6 -z1.0 -e +``` + +To encode to BC1: +``` +./bc7enc -1 blah.png +``` + +To encode to BC1 with Rate Distortion Optimization (RDO) at lambda=1.0: +``` +./bc7enc -1 -z1.0 blah.png +``` + +The -z option controls lambda, or the rate vs. distortion tradeoff. 0 = maximum quality, higher values=lower bitrates but lower quality. Try values [.25-8]. + +To encode to BC1 with RDO, with RDO debug output, to monitor the percentage of blocks impacted: +``` +./bc7enc -1 -z1.0 -zd blah.png +``` + +To encode to BC1 with RDO with a higher then default smooth block scale factor: +``` +./bc7enc -1 -z1.0 -zb40.0 blah.png +``` + +Use -zb1.0 to disable smooth block error scaling completely, which increases RDO performance but can result in noticeable artifacts on smooth/flat blocks at higher lambdas. + +Use -zc# to control the RDO window size in bytes. Good values to try are 16-8192. +Use -zt to disable RDO multithreading. + +To encode to BC1 with RDO at the highest achievable quality/effectiveness (this is extremely slow): + +``` +./bc7enc -1 -z1.0 -zc32768 blah.png +``` + +This sets the window size to 32KB (the highest setting that makes sense for Deflate). Window sizes of 2KB (the default) to 8KB are way faster and in practice are almost as effective. The maximum window size setting supported by the command line tool is 64KB, but this would be very slow. + +For even higher quality per bit (this is incredibly slow): +``` +./bc7enc -1 -z1.0 -zc32768 -zm blah.png +``` + +### Dependencies +There are no 3rd party code or library dependencies. utils.cpp/.h is only needed by the example command line tool. It uses C++11. The individual .cpp files are designed to be easily dropped into other codebases. + +For RDO post-processing of any block-based format: ert.cpp/.h. You provide this function an array of encoded blocks, an array of source/original 32bpp blocks, some parameters, and a pointer to a block decoder function for your format as a callback. It must return false if the passed in block data is invalid. (Make sure you *really* validate the block's data, because the ERT post-processor will inevitably call your callback with invalid blocks.) This transform works on most other texture formats, such as ETC1/2, EAC, and ASTC. The ERT works on block sizes ranging from 1x1 to 12x12. This file has no other dependencies apart from utils.cpp/h. + +For BC1-5 encoding/decoding: rgbcx.cpp/.h + +For BC7 encoding: bc7enc.cpp/.h + +For BC7 decoding: bc7decomp.cpp/.h -This was a multithreaded benchmark (using OpenMP) on a dual Xeon workstation. -ispc_texcomp was called with 64-blocks at a time and used AVX instructions. -Timings are for encoding only. diff --git a/libkram/bc7enc/bc7decomp.cpp b/libkram/bc7enc/bc7decomp.cpp index 3099ec4d..e5e8c696 100644 --- a/libkram/bc7enc/bc7decomp.cpp +++ b/libkram/bc7enc/bc7decomp.cpp @@ -1,9 +1,33 @@ // File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) #include "bc7decomp.h" +#include -namespace bc7decomp +#if (defined(_M_AMD64) || defined(_M_X64) || defined(__SSE2__)) +# define BC7DECOMP_USE_SSE2 +#endif + +#ifdef BC7DECOMP_USE_SSE2 +#include +#include +#endif + +namespace bc7decomp { +#ifdef BC7DECOMP_USE_SSE2 + const __m128i g_bc7_weights4_sse2[8] = + { + _mm_set_epi16(4, 4, 4, 4, 0, 0, 0, 0), + _mm_set_epi16(13, 13, 13, 13, 9, 9, 9, 9), + _mm_set_epi16(21, 21, 21, 21, 17, 17, 17, 17), + _mm_set_epi16(30, 30, 30, 30, 26, 26, 26, 26), + _mm_set_epi16(38, 38, 38, 38, 34, 34, 34, 34), + _mm_set_epi16(47, 47, 47, 47, 43, 43, 43, 43), + _mm_set_epi16(55, 55, 55, 55, 51, 51, 51, 51), + _mm_set_epi16(64, 64, 64, 64, 60, 60, 60, 60), + }; +#endif + const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; @@ -44,27 +68,32 @@ const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 }; -inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) +const uint8_t g_bc7_first_byte_to_mode[256] = { - assert(codesize <= 32); - uint32_t bits = 0; - uint32_t total_bits = 0; - - while (total_bits < codesize) - { - uint32_t byte_bit_offset = bit_offset & 7; - uint32_t bits_to_read = std::min(codesize - total_bits, 8 - byte_bit_offset); - - uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; - byte_bits &= ((1 << bits_to_read) - 1); - - bits |= (byte_bits << total_bits); + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, +}; - total_bits += bits_to_read; - bit_offset += bits_to_read; - } +inline void insert_weight_zero(uint64_t& index_bits, uint32_t bits_per_index, uint32_t offset) +{ + uint64_t LOW_BIT_MASK = (static_cast(1) << ((bits_per_index * (offset + 1)) - 1)) - 1; + uint64_t HIGH_BIT_MASK = ~LOW_BIT_MASK; - return bits; + index_bits = ((index_bits & HIGH_BIT_MASK) << 1) | (index_bits & LOW_BIT_MASK); } // BC7 mode 0-7 decompression. @@ -89,51 +118,144 @@ static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t b } return 0; } - -bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) + + +#ifdef BC7DECOMP_USE_SSE2 +static inline __m128i bc7_interp_sse2(__m128i l, __m128i h, __m128i w, __m128i iw) +{ + return _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(l, iw), _mm_mullo_epi16(h, w)), _mm_set1_epi16(32)), 6); +} + +static inline void bc7_interp2_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16 = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + + __m128i endpoints_16_swapped = _mm_shuffle_epi32(endpoints_16, _MM_SHUFFLE(1, 0, 3, 2)); + + // Interpolated colors will be color 1 and 2 + __m128i interpolated_colors = bc7_interp_sse2(endpoints_16, endpoints_16_swapped, _mm_set1_epi16(21), _mm_set1_epi16(43)); + + // all_colors will be 1, 2, 0, 3 + __m128i all_colors = _mm_packus_epi16(interpolated_colors, endpoints_16); + + all_colors = _mm_shuffle_epi32(all_colors, _MM_SHUFFLE(3, 1, 0, 2)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors); +} + +static inline void bc7_interp3_sse2(const color_rgba* endpoint_pair, color_rgba* out_colors) +{ + __m128i endpoints = _mm_loadu_si64(endpoint_pair); + __m128i endpoints_16bit = _mm_unpacklo_epi8(endpoints, _mm_setzero_si128()); + __m128i endpoints_16bit_swapped = _mm_shuffle_epi32(endpoints_16bit, _MM_SHUFFLE(1, 0, 3, 2)); + + __m128i interpolated_16 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set1_epi16(9), _mm_set1_epi16(55)); + __m128i interpolated_23 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(37, 37, 37, 37, 18, 18, 18, 18), _mm_set_epi16(27, 27, 27, 27, 46, 46, 46, 46)); + __m128i interpolated_45 = bc7_interp_sse2(endpoints_16bit, endpoints_16bit_swapped, _mm_set_epi16(18, 18, 18, 18, 37, 37, 37, 37), _mm_set_epi16(46, 46, 46, 46, 27, 27, 27, 27)); + + __m128i interpolated_01 = _mm_unpacklo_epi64(endpoints_16bit, interpolated_16); + __m128i interpolated_67 = _mm_unpackhi_epi64(interpolated_16, endpoints_16bit); + + __m128i all_colors_0 = _mm_packus_epi16(interpolated_01, interpolated_23); + __m128i all_colors_1 = _mm_packus_epi16(interpolated_45, interpolated_67); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors), all_colors_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out_colors + 4), all_colors_1); +} +#endif + +bool unpack_bc7_mode0_2(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { //const uint32_t SUBSETS = 3; const uint32_t ENDPOINTS = 6; const uint32_t COMPS = 3; const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t PBITS = (mode == 0) ? 6 : 0; +#ifndef BC7DECOMP_USE_SSE2 const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); +#endif + const uint32_t PART_BITS = (mode == 0) ? 4 : 6; + const uint32_t PART_MASK = (1 << PART_BITS) - 1; - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + const uint32_t part = (low_chunk >> (mode + 1)) & PART_MASK; + + uint64_t channel_read_chunks[3] = { 0, 0, 0 }; + + if (mode == 0) + { + channel_read_chunks[0] = low_chunk >> 5; + channel_read_chunks[1] = low_chunk >> 29; + channel_read_chunks[2] = ((low_chunk >> 53) | (high_chunk << 11)); + } + else + { + channel_read_chunks[0] = low_chunk >> 9; + channel_read_chunks[1] = ((low_chunk >> 39) | (high_chunk << 25)); + channel_read_chunks[2] = high_chunk >> 5; + } color_rgba endpoints[ENDPOINTS]; for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } uint32_t pbits[6]; - for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); + if (mode == 0) + { + uint8_t p_bits_chunk = static_cast((high_chunk >> 13) & 0xff); + + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = (p_bits_chunk >> p) & 1; + } + + uint64_t weights_read_chunk = high_chunk >> (67 - 16 * WEIGHT_BITS); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::min(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); + insert_weight_zero(weights_read_chunk, WEIGHT_BITS, std::max(g_bc7_table_anchor_index_third_subset_1[part], g_bc7_table_anchor_index_third_subset_2[part])); uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); + { + weights[i] = static_cast(weights_read_chunk & WEIGHT_MASK); + weights_read_chunk >>= WEIGHT_BITS; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + endpoints[e][c] = static_cast((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); color_rgba block_colors[3][8]; + +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 3; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else for (uint32_t s = 0; s < 3; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) { for (uint32_t c = 0; c < 3; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); block_colors[s][i][3] = 255; } +#endif for (uint32_t i = 0; i < 16; i++) pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; @@ -141,51 +263,104 @@ bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPix return true; } -bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +bool unpack_bc7_mode1_3_7(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { //const uint32_t SUBSETS = 2; const uint32_t ENDPOINTS = 4; const uint32_t COMPS = (mode == 7) ? 4 : 3; const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t PBITS = (mode == 1) ? 2 : 4; const uint32_t SHARED_PBITS = (mode == 1) ? true : false; +#ifndef BC7DECOMP_USE_SSE2 const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); - - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; +#endif + + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - const uint32_t part = read_bits32(pBuf, bit_offset, 6); + const uint32_t part = ((low_chunk >> (mode + 1)) & 0x3f); color_rgba endpoints[ENDPOINTS]; + + uint64_t channel_read_chunks[4] = { 0, 0, 0, 0 }; + uint64_t p_read_chunk = 0; + channel_read_chunks[0] = (low_chunk >> (mode + 7)); + uint64_t weight_read_chunk; + + switch (mode) + { + case 1: + channel_read_chunks[1] = (low_chunk >> 32); + channel_read_chunks[2] = ((low_chunk >> 56) | (high_chunk << 8)); + p_read_chunk = high_chunk >> 16; + weight_read_chunk = high_chunk >> 18; + break; + case 3: + channel_read_chunks[1] = ((low_chunk >> 38) | (high_chunk << 26)); + channel_read_chunks[2] = high_chunk >> 2; + p_read_chunk = high_chunk >> 30; + weight_read_chunk = high_chunk >> 34; + break; + case 7: + channel_read_chunks[1] = low_chunk >> 34; + channel_read_chunks[2] = ((low_chunk >> 54) | (high_chunk << 10)); + channel_read_chunks[3] = high_chunk >> 10; + p_read_chunk = (high_chunk >> 30); + weight_read_chunk = (high_chunk >> 34); + break; + default: + return false; + }; + for (uint32_t c = 0; c < COMPS; c++) + { + uint64_t channel_read_chunk = channel_read_chunks[c]; for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + { + endpoints[e][c] = static_cast(channel_read_chunk & ENDPOINT_MASK); + channel_read_chunk >>= ENDPOINT_BITS; + } + } uint32_t pbits[4]; for (uint32_t p = 0; p < PBITS; p++) - pbits[p] = read_bits32(pBuf, bit_offset, 1); - + pbits[p] = (p_read_chunk >> p) & 1; + + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, 0); + insert_weight_zero(weight_read_chunk, WEIGHT_BITS, g_bc7_table_anchor_index_second_subset[part]); + uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) - weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - - assert(bit_offset == 128); + { + weights[i] = static_cast(weight_read_chunk & WEIGHT_MASK); + weight_read_chunk >>= WEIGHT_BITS; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + endpoints[e][c] = static_cast((mode != 7U && c == 3U) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); color_rgba block_colors[2][8]; +#ifdef BC7DECOMP_USE_SSE2 + for (uint32_t s = 0; s < 2; s++) + { + if (WEIGHT_BITS == 2) + bc7_interp2_sse2(endpoints + s * 2, block_colors[s]); + else + bc7_interp3_sse2(endpoints + s * 2, block_colors[s]); + } +#else for (uint32_t s = 0; s < 2; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) { for (uint32_t c = 0; c < COMPS; c++) - block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][c] = static_cast(bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS)); block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; } +#endif for (uint32_t i = 0; i < 16; i++) pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; @@ -193,53 +368,101 @@ bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pP return true; } -bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +bool unpack_bc7_mode4_5(uint32_t mode, const uint64_t* data_chunks, color_rgba* pPixels) { const uint32_t ENDPOINTS = 2; - const uint32_t COMPS = 4; + //const uint32_t COMPS = 4; const uint32_t WEIGHT_BITS = 2; + const uint32_t WEIGHT_MASK = (1 << WEIGHT_BITS) - 1; const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t A_WEIGHT_MASK = (1 << A_WEIGHT_BITS) - 1; const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t ENDPOINT_MASK = (1 << ENDPOINT_BITS) - 1; const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + const uint32_t A_ENDPOINT_MASK = (1 << A_ENDPOINT_BITS) - 1; //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; - uint32_t bit_offset = 0; - const uint8_t* pBuf = static_cast(pBlock_bits); + const uint64_t low_chunk = data_chunks[0]; + const uint64_t high_chunk = data_chunks[1]; - if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + const uint32_t comp_rot = (low_chunk >> (mode + 1)) & 0x3; + const uint32_t index_mode = (mode == 4) ? static_cast((low_chunk >> 7) & 1) : 0; - const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); - const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + uint64_t color_read_bits = low_chunk >> 8; color_rgba endpoints[ENDPOINTS]; - for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t c = 0; c < 3; c++) + { for (uint32_t e = 0; e < ENDPOINTS; e++) - endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - + { + endpoints[e][c] = static_cast(color_read_bits & ENDPOINT_MASK); + color_read_bits >>= ENDPOINT_BITS; + } + } + + endpoints[0][3] = static_cast(color_read_bits & ENDPOINT_MASK); + + uint64_t rgb_weights_chunk; + uint64_t a_weights_chunk; + if (mode == 4) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast((color_read_bits >> A_ENDPOINT_BITS) & A_ENDPOINT_MASK); + rgb_weights_chunk = ((low_chunk >> 50) | (high_chunk << 14)); + a_weights_chunk = high_chunk >> 17; + } + else if (mode == 5) + { + endpoints[0][3] = static_cast(color_read_bits & A_ENDPOINT_MASK); + endpoints[1][3] = static_cast(((low_chunk >> 58) | (high_chunk << 6)) & A_ENDPOINT_MASK); + rgb_weights_chunk = high_chunk >> 2; + a_weights_chunk = high_chunk >> 33; + } + else + return false; + + insert_weight_zero(rgb_weights_chunk, WEIGHT_BITS, 0); + insert_weight_zero(a_weights_chunk, A_WEIGHT_BITS, 0); + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; - + const uint32_t weight_mask[2] = { index_mode ? A_WEIGHT_MASK : WEIGHT_MASK, index_mode ? WEIGHT_MASK : A_WEIGHT_MASK }; + uint32_t weights[16], a_weights[16]; - - for (uint32_t i = 0; i < 16; i++) - (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + if (index_mode) + std::swap(rgb_weights_chunk, a_weights_chunk); for (uint32_t i = 0; i < 16; i++) - (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + { + weights[i] = (rgb_weights_chunk & weight_mask[0]); + rgb_weights_chunk >>= weight_bits[0]; + } - assert(bit_offset == 128); + for (uint32_t i = 0; i < 16; i++) + { + a_weights[i] = (a_weights_chunk & weight_mask[1]); + a_weights_chunk >>= weight_bits[1]; + } for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) - endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + endpoints[e][c] = static_cast(bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS)); color_rgba block_colors[8]; +#ifdef BC7DECOMP_USE_SSE2 + if (weight_bits[0] == 3) + bc7_interp3_sse2(endpoints, block_colors); + else + bc7_interp2_sse2(endpoints, block_colors); +#else for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) for (uint32_t c = 0; c < 3; c++) - block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + block_colors[i][c] = static_cast(bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0])); +#endif for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) - block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + block_colors[i][3] = static_cast(bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1])); for (uint32_t i = 0; i < 16; i++) { @@ -308,26 +531,46 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) if (block.m_lo.m_mode != (1 << 6)) return false; - const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); - const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); - const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); - const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); - const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); - const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); - const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); - const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + const uint32_t r0 = static_cast((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = static_cast((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = static_cast((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = static_cast((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = static_cast((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = static_cast((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = static_cast((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = static_cast((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); color_rgba vals[16]; +#ifdef BC7DECOMP_USE_SSE2 + __m128i vep0 = _mm_set_epi16((short)a0, (short)b0, (short)g0, (short)r0, (short)a0, (short)b0, (short)g0, (short)r0); + __m128i vep1 = _mm_set_epi16((short)a1, (short)b1, (short)g1, (short)r1, (short)a1, (short)b1, (short)g1, (short)r1); + + for (uint32_t i = 0; i < 16; i += 4) + { + const __m128i w0 = g_bc7_weights4_sse2[i / 4 * 2 + 0]; + const __m128i w1 = g_bc7_weights4_sse2[i / 4 * 2 + 1]; + + const __m128i iw0 = _mm_sub_epi16(_mm_set1_epi16(64), w0); + const __m128i iw1 = _mm_sub_epi16(_mm_set1_epi16(64), w1); + + __m128i first_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw0), _mm_mullo_epi16(vep1, w0)), _mm_set1_epi16(32)), 6); + __m128i second_half = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(vep0, iw1), _mm_mullo_epi16(vep1, w1)), _mm_set1_epi16(32)), 6); + __m128i combined = _mm_packus_epi16(first_half, second_half); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(vals + i), combined); + } +#else for (uint32_t i = 0; i < 16; i++) { const uint32_t w = g_bc7_weights4[i]; const uint32_t iw = 64 - w; - vals[i].set_noclamp_rgba( - (r0 * iw + r1 * w + 32) >> 6, - (g0 * iw + g1 * w + 32) >> 6, - (b0 * iw + b1 * w + 32) >> 6, + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, (a0 * iw + a1 * w + 32) >> 6); } +#endif pPixels[0] = vals[block.m_hi.m_s00]; pPixels[1] = vals[block.m_hi.m_s10]; @@ -338,7 +581,7 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) pPixels[5] = vals[block.m_hi.m_s11]; pPixels[6] = vals[block.m_hi.m_s21]; pPixels[7] = vals[block.m_hi.m_s31]; - + pPixels[8] = vals[block.m_hi.m_s02]; pPixels[9] = vals[block.m_hi.m_s12]; pPixels[10] = vals[block.m_hi.m_s22]; @@ -354,32 +597,43 @@ bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) bool unpack_bc7(const void *pBlock, color_rgba *pPixels) { - const uint32_t first_byte = static_cast(pBlock)[0]; + const uint8_t *block_bytes = static_cast(pBlock); + uint8_t mode = g_bc7_first_byte_to_mode[block_bytes[0]]; - for (uint32_t mode = 0; mode <= 7; mode++) + uint64_t data_chunks[2]; + + uint64_t endian_check = 1; + if (*reinterpret_cast(&endian_check) == 1) + memcpy(data_chunks, pBlock, 16); + else { - if (first_byte & (1U << mode)) + data_chunks[0] = data_chunks[1] = 0; + for (int chunk_index = 0; chunk_index < 2; chunk_index++) { - switch (mode) - { - case 0: - case 2: - return unpack_bc7_mode0_2(mode, pBlock, pPixels); - case 1: - case 3: - case 7: - return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); - case 4: - case 5: - return unpack_bc7_mode4_5(mode, pBlock, pPixels); - case 6: - return unpack_bc7_mode6(pBlock, pPixels); - default: - break; - } + for (int byte_index = 0; byte_index < 8; byte_index++) + data_chunks[chunk_index] |= static_cast(block_bytes[chunk_index * 8 + byte_index]) << (byte_index * 8); } } + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, data_chunks, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, data_chunks, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, data_chunks, pPixels); + case 6: + return unpack_bc7_mode6(data_chunks, pPixels); + default: + memset(pPixels, 0, sizeof(color_rgba) * 16); + break; + } + return false; } diff --git a/libkram/bc7enc/bc7decomp.h b/libkram/bc7enc/bc7decomp.h index cccdf50e..37822fef 100644 --- a/libkram/bc7enc/bc7decomp.h +++ b/libkram/bc7enc/bc7decomp.h @@ -1,5 +1,10 @@ #pragma once +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // nonstandard extension used: nameless struct/union +#endif + #include #include //#include @@ -163,3 +168,13 @@ class color_rgba bool unpack_bc7(const void *pBlock, color_rgba *pPixels); } // namespace bc7decomp + +namespace bc7decomp_ref +{ + bool unpack_bc7(const void* pBlock, bc7decomp::color_rgba* pPixels); +} // namespace bc7decomp_ref + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + diff --git a/libkram/bc7enc/bc7decomp_ref.cpp b/libkram/bc7enc/bc7decomp_ref.cpp new file mode 100644 index 00000000..8a69e947 --- /dev/null +++ b/libkram/bc7enc/bc7decomp_ref.cpp @@ -0,0 +1,431 @@ +// File: bc7decomp.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) +#include "bc7decomp.h" + +using namespace bc7decomp; + +namespace bc7decomp_ref +{ + +const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; +const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; +const uint32_t g_bc7_weights4[16] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + +const uint8_t g_bc7_partition2[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,0,1,1,0,0,1,1, 0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1, 0,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1, 0,0,0,1,0,0,1,1,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,0,1,1,1,1,1,1,1, 0,0,0,1,0,0,1,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,0,1,1,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1, 0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, 0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1, + 0,0,0,0,1,0,0,0,1,1,1,0,1,1,1,1, 0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0, 0,1,1,1,0,0,1,1,0,0,0,1,0,0,0,0, 0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,0, 0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1, + 0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0, 0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0, 0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0, 0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0, 0,0,0,1,0,1,1,1,1,1,1,0,1,0,0,0, 0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,1,0,0,0,1,1,0,0,0,1,1,1,0, 0,0,1,1,1,0,0,1,1,0,0,1,1,1,0,0, + 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1, 0,0,0,0,1,1,1,1,0,0,0,0,1,1,1,1, 0,1,0,1,1,0,1,0,0,1,0,1,1,0,1,0, 0,0,1,1,0,0,1,1,1,1,0,0,1,1,0,0, 0,0,1,1,1,1,0,0,0,0,1,1,1,1,0,0, 0,1,0,1,0,1,0,1,1,0,1,0,1,0,1,0, 0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1, 0,1,0,1,1,0,1,0,1,0,1,0,0,1,0,1, + 0,1,1,1,0,0,1,1,1,1,0,0,1,1,1,0, 0,0,0,1,0,0,1,1,1,1,0,0,1,0,0,0, 0,0,1,1,0,0,1,0,0,1,0,0,1,1,0,0, 0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0, 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, 0,0,1,1,1,1,0,0,1,1,0,0,0,0,1,1, 0,1,1,0,0,1,1,0,1,0,0,1,1,0,0,1, 0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0, + 0,1,0,0,1,1,1,0,0,1,0,0,0,0,0,0, 0,0,1,0,0,1,1,1,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,1,0,0,1,1,1,0,0,1,0, 0,0,0,0,0,1,0,0,1,1,1,0,0,1,0,0, 0,1,1,0,1,1,0,0,1,0,0,1,0,0,1,1, 0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,1,0,0,1,1,1,0,0, 0,0,1,1,1,0,0,1,1,1,0,0,0,1,1,0, + 0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,1, 0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1, 0,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1, 0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1, 0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1, 0,0,1,1,0,0,1,1,1,1,1,1,0,0,0,0, 0,0,1,0,0,0,1,0,1,1,1,0,1,1,1,0, 0,1,0,0,0,1,0,0,0,1,1,1,0,1,1,1 +}; + +const uint8_t g_bc7_partition3[64 * 16] = +{ + 0,0,1,1,0,0,1,1,0,2,2,1,2,2,2,2, 0,0,0,1,0,0,1,1,2,2,1,1,2,2,2,1, 0,0,0,0,2,0,0,1,2,2,1,1,2,2,1,1, 0,2,2,2,0,0,2,2,0,0,1,1,0,1,1,1, 0,0,0,0,0,0,0,0,1,1,2,2,1,1,2,2, 0,0,1,1,0,0,1,1,0,0,2,2,0,0,2,2, 0,0,2,2,0,0,2,2,1,1,1,1,1,1,1,1, 0,0,1,1,0,0,1,1,2,2,1,1,2,2,1,1, + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, 0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2, 0,0,1,2,0,0,1,2,0,0,1,2,0,0,1,2, 0,1,1,2,0,1,1,2,0,1,1,2,0,1,1,2, 0,1,2,2,0,1,2,2,0,1,2,2,0,1,2,2, 0,0,1,1,0,1,1,2,1,1,2,2,1,2,2,2, 0,0,1,1,2,0,0,1,2,2,0,0,2,2,2,0, + 0,0,0,1,0,0,1,1,0,1,1,2,1,1,2,2, 0,1,1,1,0,0,1,1,2,0,0,1,2,2,0,0, 0,0,0,0,1,1,2,2,1,1,2,2,1,1,2,2, 0,0,2,2,0,0,2,2,0,0,2,2,1,1,1,1, 0,1,1,1,0,1,1,1,0,2,2,2,0,2,2,2, 0,0,0,1,0,0,0,1,2,2,2,1,2,2,2,1, 0,0,0,0,0,0,1,1,0,1,2,2,0,1,2,2, 0,0,0,0,1,1,0,0,2,2,1,0,2,2,1,0, + 0,1,2,2,0,1,2,2,0,0,1,1,0,0,0,0, 0,0,1,2,0,0,1,2,1,1,2,2,2,2,2,2, 0,1,1,0,1,2,2,1,1,2,2,1,0,1,1,0, 0,0,0,0,0,1,1,0,1,2,2,1,1,2,2,1, 0,0,2,2,1,1,0,2,1,1,0,2,0,0,2,2, 0,1,1,0,0,1,1,0,2,0,0,2,2,2,2,2, 0,0,1,1,0,1,2,2,0,1,2,2,0,0,1,1, 0,0,0,0,2,0,0,0,2,2,1,1,2,2,2,1, + 0,0,0,0,0,0,0,2,1,1,2,2,1,2,2,2, 0,2,2,2,0,0,2,2,0,0,1,2,0,0,1,1, 0,0,1,1,0,0,1,2,0,0,2,2,0,2,2,2, 0,1,2,0,0,1,2,0,0,1,2,0,0,1,2,0, 0,0,0,0,1,1,1,1,2,2,2,2,0,0,0,0, 0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0, 0,1,2,0,2,0,1,2,1,2,0,1,0,1,2,0, 0,0,1,1,2,2,0,0,1,1,2,2,0,0,1,1, + 0,0,1,1,1,1,2,2,2,2,0,0,0,0,1,1, 0,1,0,1,0,1,0,1,2,2,2,2,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,2,1,2,1,2,1, 0,0,2,2,1,1,2,2,0,0,2,2,1,1,2,2, 0,0,2,2,0,0,1,1,0,0,2,2,0,0,1,1, 0,2,2,0,1,2,2,1,0,2,2,0,1,2,2,1, 0,1,0,1,2,2,2,2,2,2,2,2,0,1,0,1, 0,0,0,0,2,1,2,1,2,1,2,1,2,1,2,1, + 0,1,0,1,0,1,0,1,0,1,0,1,2,2,2,2, 0,2,2,2,0,1,1,1,0,2,2,2,0,1,1,1, 0,0,0,2,1,1,1,2,0,0,0,2,1,1,1,2, 0,0,0,0,2,1,1,2,2,1,1,2,2,1,1,2, 0,2,2,2,0,1,1,1,0,1,1,1,0,2,2,2, 0,0,0,2,1,1,1,2,1,1,1,2,0,0,0,2, 0,1,1,0,0,1,1,0,0,1,1,0,2,2,2,2, 0,0,0,0,0,0,0,0,2,1,1,2,2,1,1,2, + 0,1,1,0,0,1,1,0,2,2,2,2,2,2,2,2, 0,0,2,2,0,0,1,1,0,0,1,1,0,0,2,2, 0,0,2,2,1,1,2,2,1,1,2,2,0,0,2,2, 0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,2, 0,0,0,2,0,0,0,1,0,0,0,2,0,0,0,1, 0,2,2,2,1,2,2,2,0,2,2,2,1,2,2,2, 0,1,0,1,2,2,2,2,2,2,2,2,2,2,2,2, 0,1,1,1,2,0,1,1,2,2,0,1,2,2,2,0, +}; + +const uint8_t g_bc7_table_anchor_index_second_subset[64] = { 15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15, 15, 2, 8, 2, 2, 8, 8,15, 2, 8, 2, 2, 8, 8, 2, 2, 15,15, 6, 8, 2, 8,15,15, 2, 8, 2, 2, 2,15,15, 6, 6, 2, 6, 8,15,15, 2, 2, 15,15,15,15,15, 2, 2,15 }; + +const uint8_t g_bc7_table_anchor_index_third_subset_1[64] = +{ + 3, 3,15,15, 8, 3,15,15, 8, 8, 6, 6, 6, 5, 3, 3, 3, 3, 8,15, 3, 3, 6,10, 5, 8, 8, 6, 8, 5,15,15, 8,15, 3, 5, 6,10, 8,15, 15, 3,15, 5,15,15,15,15, 3,15, 5, 5, 5, 8, 5,10, 5,10, 8,13,15,12, 3, 3 +}; + +const uint8_t g_bc7_table_anchor_index_third_subset_2[64] = +{ + 15, 8, 8, 3,15,15, 3, 8, 15,15,15,15,15,15,15, 8, 15, 8,15, 3,15, 8,15, 8, 3,15, 6,10,15,15,10, 8, 15, 3,15,10,10, 8, 9,10, 6,15, 8,15, 3, 6, 6, 8, 15, 3,15,15,15,15,15,15, 15,15,15,15, 3,15,15, 8 +}; + +inline uint32_t read_bits32(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize) +{ + assert(codesize <= 32); + uint32_t bits = 0; + uint32_t total_bits = 0; + + while (total_bits < codesize) + { + uint32_t byte_bit_offset = bit_offset & 7; + uint32_t bits_to_read = std::min(codesize - total_bits, 8 - byte_bit_offset); + + uint32_t byte_bits = pBuf[bit_offset >> 3] >> byte_bit_offset; + byte_bits &= ((1 << bits_to_read) - 1); + + bits |= (byte_bits << total_bits); + + total_bits += bits_to_read; + bit_offset += bits_to_read; + } + + return bits; +} + +// BC7 mode 0-7 decompression. +// Instead of one monster routine to unpack all the BC7 modes, we're lumping the 3 subset, 2 subset, 1 subset, and dual plane modes together into simple shared routines. + +static inline uint32_t bc7_dequant(uint32_t val, uint32_t pbit, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(pbit < 2); assert(val_bits >= 4 && val_bits <= 8); const uint32_t total_bits = val_bits + 1; val = (val << 1) | pbit; val <<= (8 - total_bits); val |= (val >> total_bits); assert(val <= 255); return val; } +static inline uint32_t bc7_dequant(uint32_t val, uint32_t val_bits) { assert(val < (1U << val_bits)); assert(val_bits >= 4 && val_bits <= 8); val <<= (8 - val_bits); val |= (val >> val_bits); assert(val <= 255); return val; } + +static inline uint32_t bc7_interp2(uint32_t l, uint32_t h, uint32_t w) { assert(w < 4); return (l * (64 - g_bc7_weights2[w]) + h * g_bc7_weights2[w] + 32) >> 6; } +static inline uint32_t bc7_interp3(uint32_t l, uint32_t h, uint32_t w) { assert(w < 8); return (l * (64 - g_bc7_weights3[w]) + h * g_bc7_weights3[w] + 32) >> 6; } +static inline uint32_t bc7_interp4(uint32_t l, uint32_t h, uint32_t w) { assert(w < 16); return (l * (64 - g_bc7_weights4[w]) + h * g_bc7_weights4[w] + 32) >> 6; } +static inline uint32_t bc7_interp(uint32_t l, uint32_t h, uint32_t w, uint32_t bits) +{ + assert(l <= 255 && h <= 255); + switch (bits) + { + case 2: return bc7_interp2(l, h, w); + case 3: return bc7_interp3(l, h, w); + case 4: return bc7_interp4(l, h, w); + default: + break; + } + return 0; +} + +bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 3; + const uint32_t ENDPOINTS = 6; + const uint32_t COMPS = 3; + const uint32_t WEIGHT_BITS = (mode == 0) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; + const uint32_t PBITS = (mode == 0) ? 6 : 0; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, (mode == 0) ? 4 : 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[6]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_third_subset_1[part]) || (i == g_bc7_table_anchor_index_third_subset_2[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == 3) ? 255 : (PBITS ? bc7_dequant(endpoints[e][c], pbits[e], ENDPOINT_BITS) : bc7_dequant(endpoints[e][c], ENDPOINT_BITS))); + + color_rgba block_colors[3][8]; + for (uint32_t s = 0; s < 3; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < 3; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = 255; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition3[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode1_3_7(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + //const uint32_t SUBSETS = 2; + const uint32_t ENDPOINTS = 4; + const uint32_t COMPS = (mode == 7) ? 4 : 3; + const uint32_t WEIGHT_BITS = (mode == 1) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 7) ? 5 : ((mode == 1) ? 6 : 7); + const uint32_t PBITS = (mode == 1) ? 2 : 4; + const uint32_t SHARED_PBITS = (mode == 1) ? true : false; + const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t part = read_bits32(pBuf, bit_offset, 6); + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); + + uint32_t pbits[4]; + for (uint32_t p = 0; p < PBITS; p++) + pbits[p] = read_bits32(pBuf, bit_offset, 1); + + uint32_t weights[16]; + for (uint32_t i = 0; i < 16; i++) + weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); + + color_rgba block_colors[2][8]; + for (uint32_t s = 0; s < 2; s++) + for (uint32_t i = 0; i < WEIGHT_VALS; i++) + { + for (uint32_t c = 0; c < COMPS; c++) + block_colors[s][i][c] = (uint8_t)bc7_interp(endpoints[s * 2 + 0][c], endpoints[s * 2 + 1][c], i, WEIGHT_BITS); + block_colors[s][i][3] = (COMPS == 3) ? 255 : block_colors[s][i][3]; + } + + for (uint32_t i = 0; i < 16; i++) + pPixels[i] = block_colors[g_bc7_partition2[part * 16 + i]][weights[i]]; + + return true; +} + +bool unpack_bc7_mode4_5(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) +{ + const uint32_t ENDPOINTS = 2; + const uint32_t COMPS = 4; + const uint32_t WEIGHT_BITS = 2; + const uint32_t A_WEIGHT_BITS = (mode == 4) ? 3 : 2; + const uint32_t ENDPOINT_BITS = (mode == 4) ? 5 : 7; + const uint32_t A_ENDPOINT_BITS = (mode == 4) ? 6 : 8; + //const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; + //const uint32_t A_WEIGHT_VALS = 1 << A_WEIGHT_BITS; + + uint32_t bit_offset = 0; + const uint8_t* pBuf = static_cast(pBlock_bits); + + if (read_bits32(pBuf, bit_offset, mode + 1) != (1U << mode)) return false; + + const uint32_t comp_rot = read_bits32(pBuf, bit_offset, 2); + const uint32_t index_mode = (mode == 4) ? read_bits32(pBuf, bit_offset, 1) : 0; + + color_rgba endpoints[ENDPOINTS]; + for (uint32_t c = 0; c < COMPS; c++) + for (uint32_t e = 0; e < ENDPOINTS; e++) + endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; + + uint32_t weights[16], a_weights[16]; + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); + + for (uint32_t i = 0; i < 16; i++) + (index_mode ? weights : a_weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[1 - index_mode] - ((!i) ? 1 : 0)); + + assert(bit_offset == 128); + + for (uint32_t e = 0; e < ENDPOINTS; e++) + for (uint32_t c = 0; c < 4; c++) + endpoints[e][c] = (uint8_t)bc7_dequant(endpoints[e][c], (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); + + color_rgba block_colors[8]; + for (uint32_t i = 0; i < (1U << weight_bits[0]); i++) + for (uint32_t c = 0; c < 3; c++) + block_colors[i][c] = (uint8_t)bc7_interp(endpoints[0][c], endpoints[1][c], i, weight_bits[0]); + + for (uint32_t i = 0; i < (1U << weight_bits[1]); i++) + block_colors[i][3] = (uint8_t)bc7_interp(endpoints[0][3], endpoints[1][3], i, weight_bits[1]); + + for (uint32_t i = 0; i < 16; i++) + { + pPixels[i] = block_colors[weights[i]]; + pPixels[i].a = block_colors[a_weights[i]].a; + if (comp_rot >= 1) + std::swap(pPixels[i].a, pPixels[i].m_comps[comp_rot - 1]); + } + + return true; +} + +struct bc7_mode_6 +{ + struct + { + uint64_t m_mode : 7; + uint64_t m_r0 : 7; + uint64_t m_r1 : 7; + uint64_t m_g0 : 7; + uint64_t m_g1 : 7; + uint64_t m_b0 : 7; + uint64_t m_b1 : 7; + uint64_t m_a0 : 7; + uint64_t m_a1 : 7; + uint64_t m_p0 : 1; + } m_lo; + + union + { + struct + { + uint64_t m_p1 : 1; + uint64_t m_s00 : 3; + uint64_t m_s10 : 4; + uint64_t m_s20 : 4; + uint64_t m_s30 : 4; + + uint64_t m_s01 : 4; + uint64_t m_s11 : 4; + uint64_t m_s21 : 4; + uint64_t m_s31 : 4; + + uint64_t m_s02 : 4; + uint64_t m_s12 : 4; + uint64_t m_s22 : 4; + uint64_t m_s32 : 4; + + uint64_t m_s03 : 4; + uint64_t m_s13 : 4; + uint64_t m_s23 : 4; + uint64_t m_s33 : 4; + + } m_hi; + + uint64_t m_hi_bits; + }; +}; + +bool unpack_bc7_mode6(const void *pBlock_bits, color_rgba *pPixels) +{ + static_assert(sizeof(bc7_mode_6) == 16, "sizeof(bc7_mode_6) == 16"); + + const bc7_mode_6 &block = *static_cast(pBlock_bits); + + if (block.m_lo.m_mode != (1 << 6)) + return false; + + const uint32_t r0 = (uint32_t)((block.m_lo.m_r0 << 1) | block.m_lo.m_p0); + const uint32_t g0 = (uint32_t)((block.m_lo.m_g0 << 1) | block.m_lo.m_p0); + const uint32_t b0 = (uint32_t)((block.m_lo.m_b0 << 1) | block.m_lo.m_p0); + const uint32_t a0 = (uint32_t)((block.m_lo.m_a0 << 1) | block.m_lo.m_p0); + const uint32_t r1 = (uint32_t)((block.m_lo.m_r1 << 1) | block.m_hi.m_p1); + const uint32_t g1 = (uint32_t)((block.m_lo.m_g1 << 1) | block.m_hi.m_p1); + const uint32_t b1 = (uint32_t)((block.m_lo.m_b1 << 1) | block.m_hi.m_p1); + const uint32_t a1 = (uint32_t)((block.m_lo.m_a1 << 1) | block.m_hi.m_p1); + + color_rgba vals[16]; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t w = g_bc7_weights4[i]; + const uint32_t iw = 64 - w; + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, + (a0 * iw + a1 * w + 32) >> 6); + } + + pPixels[0] = vals[block.m_hi.m_s00]; + pPixels[1] = vals[block.m_hi.m_s10]; + pPixels[2] = vals[block.m_hi.m_s20]; + pPixels[3] = vals[block.m_hi.m_s30]; + + pPixels[4] = vals[block.m_hi.m_s01]; + pPixels[5] = vals[block.m_hi.m_s11]; + pPixels[6] = vals[block.m_hi.m_s21]; + pPixels[7] = vals[block.m_hi.m_s31]; + + pPixels[8] = vals[block.m_hi.m_s02]; + pPixels[9] = vals[block.m_hi.m_s12]; + pPixels[10] = vals[block.m_hi.m_s22]; + pPixels[11] = vals[block.m_hi.m_s32]; + + pPixels[12] = vals[block.m_hi.m_s03]; + pPixels[13] = vals[block.m_hi.m_s13]; + pPixels[14] = vals[block.m_hi.m_s23]; + pPixels[15] = vals[block.m_hi.m_s33]; + + return true; +} + +bool unpack_bc7(const void *pBlock, bc7decomp::color_rgba *pPixels) +{ + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + if (first_byte & (1U << mode)) + { + switch (mode) + { + case 0: + case 2: + return unpack_bc7_mode0_2(mode, pBlock, pPixels); + case 1: + case 3: + case 7: + return unpack_bc7_mode1_3_7(mode, pBlock, pPixels); + case 4: + case 5: + return unpack_bc7_mode4_5(mode, pBlock, pPixels); + case 6: + return unpack_bc7_mode6(pBlock, pPixels); + default: + break; + } + } + } + + return false; +} + +} // namespace bc7decomp_ref + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright(c) 2020 Richard Geldreich, Jr. +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files(the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and / or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions : +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain(www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non - commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain.We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors.We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ + diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp index b2403b84..fd8e43c2 100644 --- a/libkram/bc7enc/bc7enc.cpp +++ b/libkram/bc7enc/bc7enc.cpp @@ -1,82 +1,49 @@ // File: bc7enc.c - Richard Geldreich, Jr. 3/31/2020 - MIT license or public domain (see end of file) // Currently supports modes 1, 6 for RGB blocks, and modes 5, 6, 7 for RGBA blocks. -// NOTE: This module is still a work in progress as of 3/31/2020. It needs to support mode modes for RGB content. #include "bc7enc.h" #include #include #include #include -#include - -// Make mapping to simd classes more simple. -// Repeated individual ops instead of functions that can be optimized don't result in a speedup. -// The algorithm is already so fast. -#define USE_SIMD_BCENC 0 +//#include // Helpers static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } -#if !USE_SIMD_BCENC static inline float saturate(float value) { return clampf(value, 0, 1.0f); } -#endif //static inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; } +static inline int32_t minimumi(int32_t a, int32_t b) { return (a < b) ? a : b; } static inline uint32_t minimumu(uint32_t a, uint32_t b) { return (a < b) ? a : b; } static inline float minimumf(float a, float b) { return (a < b) ? a : b; } //static inline uint8_t maximumub(uint8_t a, uint8_t b) { return (a > b) ? a : b; } static inline uint32_t maximumu(uint32_t a, uint32_t b) { return (a > b) ? a : b; } +//static inline int32_t maximumi(int32_t a, int32_t b) { return (a > b) ? a : b; } static inline float maximumf(float a, float b) { return (a > b) ? a : b; } static inline int squarei(int i) { return i * i; } static inline float squaref(float i) { return i * i; } +template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } static inline int32_t iabs32(int32_t v) { uint32_t msk = v >> 31; return (v ^ msk) - msk; } //static inline void swapub(uint8_t* a, uint8_t* b) { uint8_t t = *a; *a = *b; *b = t; } static inline void swapu(uint32_t* a, uint32_t* b) { uint32_t t = *a; *a = *b; *b = t; } //static inline void swapf(float* a, float* b) { float t = *a; *a = *b; *b = t; } -struct color_quad_u8 { - uint8_t r, g, b, a; - inline const uint8_t& operator[](int index) const { return *(&r + index); } - inline uint8_t& operator[](int index) { return *(&r + index); } -}; - -static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->r = (uint8_t)clampi(r, 0, 255); pRes->g = (uint8_t)clampi(g, 0, 255); pRes->b = (uint8_t)clampi(b, 0, 255); pRes->a = (uint8_t)clampi(a, 0, 255); return pRes; } -static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->r = (uint8_t)r; pRes->g = (uint8_t)g; pRes->b = (uint8_t)b; pRes->a = (uint8_t)a; return pRes; } -static inline bc7enc_bool color_quad_u8_notequals(const color_quad_u8 *pLHS, const color_quad_u8 *pRHS) { return (pLHS->r != pRHS->r) || (pLHS->g != pRHS->g) || (pLHS->b != pRHS->b) || (pLHS->a != pRHS->a); } - -#if USE_SIMD_BCENC -using namespace simd; -using vec4F = float4; - -static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { *pV = vec4F(x); return pV; } -static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { *pV = float4m(x,y,z,w); return pV; } -static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { *pV = saturate(*pV); return pV; } -static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res = saturate(*pV); return res; } - -static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = float4m((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; } -static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS + *pRHS; return res; } -static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS - *pRHS; return res; } -static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return dot(*pLHS, *pRHS); } -static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res = *pLHS * s; return res; } -static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { *pV = normalize(*pV); return pV; } +struct vec4F { float m_c[4]; }; + +static inline color_rgba *color_quad_u8_set_clamped(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->m_c[0] = (uint8_t)clampi(r, 0, 255); pRes->m_c[1] = (uint8_t)clampi(g, 0, 255); pRes->m_c[2] = (uint8_t)clampi(b, 0, 255); pRes->m_c[3] = (uint8_t)clampi(a, 0, 255); return pRes; } +static inline color_rgba *color_quad_u8_set(color_rgba *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->m_c[0] = (uint8_t)r; pRes->m_c[1] = (uint8_t)g; pRes->m_c[2] = (uint8_t)b; pRes->m_c[3] = (uint8_t)a; return pRes; } +static inline bool color_quad_u8_notequals(const color_rgba *pLHS, const color_rgba *pRHS) { return (pLHS->m_c[0] != pRHS->m_c[0]) || (pLHS->m_c[1] != pRHS->m_c[1]) || (pLHS->m_c[2] != pRHS->m_c[2]) || (pLHS->m_c[3] != pRHS->m_c[3]); } +static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { pV->m_c[0] = x; pV->m_c[1] = x; pV->m_c[2] = x; pV->m_c[3] = x; return pV; } +static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { pV->m_c[0] = x; pV->m_c[1] = y; pV->m_c[2] = z; pV->m_c[3] = w; return pV; } +static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->m_c[0] = saturate(pV->m_c[0]); pV->m_c[1] = saturate(pV->m_c[1]); pV->m_c[2] = saturate(pV->m_c[2]); pV->m_c[3] = saturate(pV->m_c[3]); return pV; } +static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.m_c[0] = saturate(pV->m_c[0]); res.m_c[1] = saturate(pV->m_c[1]); res.m_c[2] = saturate(pV->m_c[2]); res.m_c[3] = saturate(pV->m_c[3]); return res; } +static inline vec4F vec4F_from_color(const color_rgba *pC) { vec4F res; vec4F_set(&res, pC->m_c[0], pC->m_c[1], pC->m_c[2], pC->m_c[3]); return res; } +static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] + pRHS->m_c[0], pLHS->m_c[1] + pRHS->m_c[1], pLHS->m_c[2] + pRHS->m_c[2], pLHS->m_c[3] + pRHS->m_c[3]); return res; } +static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->m_c[0] - pRHS->m_c[0], pLHS->m_c[1] - pRHS->m_c[1], pLHS->m_c[2] - pRHS->m_c[2], pLHS->m_c[3] - pRHS->m_c[3]); return res; } +static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->m_c[0] * pRHS->m_c[0] + pLHS->m_c[1] * pRHS->m_c[1] + pLHS->m_c[2] * pRHS->m_c[2] + pLHS->m_c[3] * pRHS->m_c[3]; } +static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->m_c[0] * s, pLHS->m_c[1] * s, pLHS->m_c[2] * s, pLHS->m_c[3] * s); return res; } +static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->m_c[0] * pV->m_c[0] + pV->m_c[1] * pV->m_c[1] + pV->m_c[2] * pV->m_c[2] + pV->m_c[3] * pV->m_c[3]; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->m_c[0] *= s; pV->m_c[1] *= s; pV->m_c[2] *= s; pV->m_c[3] *= s; } return pV; } -#else -struct vec4F { - float r, g, b, a; - inline const float& operator[](int index) const { return *(&r + index); } - inline float& operator[](int index) { return *(&r + index); } -}; - -static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { pV->r = x; pV->g = x; pV->b = x; pV->a = x; return pV; } -static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { pV->r = x; pV->g = y; pV->b = z; pV->a = w; return pV; } -static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { pV->r = saturate(pV->r); pV->g = saturate(pV->g); pV->b = saturate(pV->b); pV->a = saturate(pV->a); return pV; } -static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res; res.r = saturate(pV->r); res.g = saturate(pV->g); res.b = saturate(pV->b); res.a = saturate(pV->a); return res; } -static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res; vec4F_set(&res, pC->r, pC->g, pC->b, pC->a); return res; } -static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r + pRHS->r, pLHS->g + pRHS->g, pLHS->b + pRHS->b, pLHS->a + pRHS->a); return res; } -static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res; vec4F_set(&res, pLHS->r - pRHS->r, pLHS->g - pRHS->g, pLHS->b - pRHS->b, pLHS->a - pRHS->a); return res; } -static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return pLHS->r * pRHS->r + pLHS->g * pRHS->g + pLHS->b * pRHS->b + pLHS->a * pRHS->a; } -static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res; vec4F_set(&res, pLHS->r * s, pLHS->g * s, pLHS->b * s, pLHS->a * s); return res; } -static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { float s = pV->r * pV->r + pV->g * pV->g + pV->b * pV->b + pV->a * pV->a; if (s != 0.0f) { s = 1.0f / sqrtf(s); pV->r *= s; pV->g *= s; pV->b *= s; pV->a *= s; } return pV; } -#endif // Various BC7 tables static const uint32_t g_bc7_weights2[4] = { 0, 21, 43, 64 }; static const uint32_t g_bc7_weights3[8] = { 0, 9, 18, 27, 37, 46, 55, 64 }; @@ -135,7 +102,7 @@ static const uint8_t g_bc7_mode_has_p_bits[8] = { 1, 1, 0, 1, 0, 0, 1, 1 }; static const uint8_t g_bc7_mode_has_shared_p_bits[8] = { 0, 1, 0, 0, 0, 0, 0, 0 }; static const uint8_t g_bc7_color_precision_table[8] = { 4, 6, 5, 7, 5, 7, 7, 5 }; static const int8_t g_bc7_alpha_precision_table[8] = { 0, 0, 0, 0, 6, 8, 7, 5 }; -static bc7enc_bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } +static bool get_bc7_mode_has_seperate_alpha_selectors(int mode) { return (mode == 4) || (mode == 5); } typedef struct { uint16_t m_error; uint8_t m_lo; uint8_t m_hi; } endpoint_err; @@ -145,9 +112,105 @@ static const uint32_t BC7ENC_MODE_1_OPTIMAL_INDEX = 2; static endpoint_err g_bc7_mode_7_optimal_endpoints[256][2][2]; // [c][pbit][hp][lp] const uint32_t BC7E_MODE_7_OPTIMAL_INDEX = 1; -// Initialize the lookup table used for optimal single color compression in mode 1. Must be called before encoding. +static float g_mode1_rgba_midpoints[64][2]; +static float g_mode5_rgba_midpoints[128]; +static float g_mode7_rgba_midpoints[32][2]; + +static uint8_t g_mode6_reduced_quant[2048][2]; + +static bool g_initialized; + +// Initialize the lookup table used for optimal single color compression in mode 1/7. Must be called before encoding. void bc7enc_compress_block_init() { + if (g_initialized) + return; + + // Mode 7 endpoint midpoints + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 32; i++) + { + uint32_t vl = ((i << 1) | p) << 2; + vl |= (vl >> 6); + float lo = vl / 255.0f; + + uint32_t vh = ((minimumi(31, (i + 1)) << 1) | p) << 2; + vh |= (vh >> 6); + float hi = vh / 255.0f; + + //g_mode7_quant_values[i][p] = lo; + if (i == 31) + g_mode7_rgba_midpoints[i][p] = 1.0f; + else + g_mode7_rgba_midpoints[i][p] = (lo + hi) / 2.0f; + } + } + + // Mode 1 endpoint midpoints + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 64; i++) + { + uint32_t vl = ((i << 1) | p) << 1; + vl |= (vl >> 7); + float lo = vl / 255.0f; + + uint32_t vh = ((minimumi(63, (i + 1)) << 1) | p) << 1; + vh |= (vh >> 7); + float hi = vh / 255.0f; + + //g_mode1_quant_values[i][p] = lo; + if (i == 63) + g_mode1_rgba_midpoints[i][p] = 1.0f; + else + g_mode1_rgba_midpoints[i][p] = (lo + hi) / 2.0f; + } + } + + // Mode 5 endpoint midpoints + for (uint32_t i = 0; i < 128; i++) + { + uint32_t vl = (i << 1); + vl |= (vl >> 7); + float lo = vl / 255.0f; + + uint32_t vh = minimumi(127, i + 1) << 1; + vh |= (vh >> 7); + float hi = vh / 255.0f; + + if (i == 127) + g_mode5_rgba_midpoints[i] = 1.0f; + else + g_mode5_rgba_midpoints[i] = (lo + hi) / 2.0f; + } + + for (uint32_t p = 0; p < 2; p++) + { + for (uint32_t i = 0; i < 2048; i++) + { + float f = i / 2047.0f; + + float best_err = 1e+9f; + int best_index = 0; + for (int j = 0; j < 64; j++) + { + int ik = (j * 127 + 31) / 63; + float k = ((ik << 1) + p) / 255.0f; + + float e = fabsf(k - f); + if (e < best_err) + { + best_err = e; + best_index = ik; + } + } + + g_mode6_reduced_quant[i][p] = (uint8_t)best_index; + } + } // p + + // Mode 1 for (int c = 0; c < 256; c++) { for (uint32_t lp = 0; lp < 2; lp++) @@ -217,9 +280,11 @@ void bc7enc_compress_block_init() } // lp } // c + + g_initialized = true; } -static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors) +static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba *pColors) { // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. @@ -231,14 +296,14 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel][0]; - z10 += pSelector_weights[sel][1]; - z11 += pSelector_weights[sel][2]; - float w = pSelector_weights[sel][3]; - q00_r += w * pColors[i].r; t_r += pColors[i].r; - q00_g += w * pColors[i].g; t_g += pColors[i].g; - q00_b += w * pColors[i].b; t_b += pColors[i].b; - q00_a += w * pColors[i].a; t_a += pColors[i].a; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; } q10_r = t_r - q00_r; @@ -258,32 +323,32 @@ static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSel iz10 = -z10 * det; iz11 = z00 * det; - pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r); - pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g); - pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b); - pXl->a = (float)(iz00 * q00_a + iz01 * q10_a); pXh->a = (float)(iz10 * q00_a + iz11 * q10_a); + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = (float)(iz00 * q00_a + iz01 * q10_a); pXh->m_c[3] = (float)(iz10 * q00_a + iz11 * q10_a); for (uint32_t c = 0; c < 4; c++) { - if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f)) + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) { uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i][c]); - hi_v = maximumu(hi_v, pColors[i][c]); + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); } if (lo_v == hi_v) { - (*pXl)[c] = (float)lo_v; - (*pXh)[c] = (float)hi_v; + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; } } } } -static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_quad_u8 *pColors) +static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSelectors, const vec4F *pSelector_weights, vec4F *pXl, vec4F *pXh, const color_rgba*pColors) { float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; @@ -292,13 +357,13 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel].r; - z10 += pSelector_weights[sel].g; - z11 += pSelector_weights[sel].b; - float w = pSelector_weights[sel].a; - q00_r += w * pColors[i].r; t_r += pColors[i].r; - q00_g += w * pColors[i].g; t_g += pColors[i].g; - q00_b += w * pColors[i].b; t_b += pColors[i].b; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; + float w = pSelector_weights[sel].m_c[3]; + q00_r += w * pColors[i].m_c[0]; t_r += pColors[i].m_c[0]; + q00_g += w * pColors[i].m_c[1]; t_g += pColors[i].m_c[1]; + q00_b += w * pColors[i].m_c[2]; t_b += pColors[i].m_c[2]; } q10_r = t_r - q00_r; @@ -317,32 +382,32 @@ static void compute_least_squares_endpoints_rgb(uint32_t N, const uint8_t *pSele iz10 = -z10 * det; iz11 = z00 * det; - pXl->r = (float)(iz00 * q00_r + iz01 * q10_r); pXh->r = (float)(iz10 * q00_r + iz11 * q10_r); - pXl->g = (float)(iz00 * q00_g + iz01 * q10_g); pXh->g = (float)(iz10 * q00_g + iz11 * q10_g); - pXl->b = (float)(iz00 * q00_b + iz01 * q10_b); pXh->b = (float)(iz10 * q00_b + iz11 * q10_b); - pXl->a = 255.0f; pXh->a = 255.0f; + pXl->m_c[0] = (float)(iz00 * q00_r + iz01 * q10_r); pXh->m_c[0] = (float)(iz10 * q00_r + iz11 * q10_r); + pXl->m_c[1] = (float)(iz00 * q00_g + iz01 * q10_g); pXh->m_c[1] = (float)(iz10 * q00_g + iz11 * q10_g); + pXl->m_c[2] = (float)(iz00 * q00_b + iz01 * q10_b); pXh->m_c[2] = (float)(iz10 * q00_b + iz11 * q10_b); + pXl->m_c[3] = 255.0f; pXh->m_c[3] = 255.0f; for (uint32_t c = 0; c < 3; c++) { - if (((*pXl)[c] < 0.0f) || ((*pXh)[c] > 255.0f)) + if ((pXl->m_c[c] < 0.0f) || (pXh->m_c[c] > 255.0f)) { uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i][c]); - hi_v = maximumu(hi_v, pColors[i][c]); + lo_v = minimumu(lo_v, pColors[i].m_c[c]); + hi_v = maximumu(hi_v, pColors[i].m_c[c]); } if (lo_v == hi_v) { - (*pXl)[c] = (float)lo_v; - (*pXh)[c] = (float)hi_v; + pXl->m_c[c] = (float)lo_v; + pXh->m_c[c] = (float)hi_v; } } } } -static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_quad_u8* pColors) +static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights, float* pXl, float* pXh, const color_rgba *pColors) { // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. @@ -352,13 +417,13 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect { const uint32_t sel = pSelectors[i]; - z00 += pSelector_weights[sel].r; - z10 += pSelector_weights[sel].g; - z11 += pSelector_weights[sel].b; + z00 += pSelector_weights[sel].m_c[0]; + z10 += pSelector_weights[sel].m_c[1]; + z11 += pSelector_weights[sel].m_c[2]; - float w = pSelector_weights[sel].a; + float w = pSelector_weights[sel].m_c[3]; - q00_a += w * pColors[i].a; t_a += pColors[i].a; + q00_a += w * pColors[i].m_c[3]; t_a += pColors[i].m_c[3]; } q10_a = t_a - q00_a; @@ -382,8 +447,8 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect uint32_t lo_v = UINT32_MAX, hi_v = 0; for (uint32_t i = 0; i < N; i++) { - lo_v = minimumu(lo_v, pColors[i].a); - hi_v = maximumu(hi_v, pColors[i].a); + lo_v = minimumu(lo_v, pColors[i].m_c[3]); + hi_v = maximumu(hi_v, pColors[i].m_c[3]); } if (lo_v == hi_v) @@ -394,78 +459,78 @@ static void compute_least_squares_endpoints_a(uint32_t N, const uint8_t* pSelect } } -typedef struct +struct color_cell_compressor_params { uint32_t m_num_pixels; - const color_quad_u8 *m_pPixels; + const color_rgba *m_pPixels; uint32_t m_num_selector_weights; const uint32_t *m_pSelector_weights; const vec4F *m_pSelector_weightsx; uint32_t m_comp_bits; uint32_t m_weights[4]; - bc7enc_bool m_has_alpha; - bc7enc_bool m_has_pbits; - bc7enc_bool m_endpoints_share_pbit; - bc7enc_bool m_perceptual; -} color_cell_compressor_params; + bool m_has_alpha; + bool m_has_pbits; + bool m_endpoints_share_pbit; + bool m_perceptual; +}; -typedef struct +struct color_cell_compressor_results { uint64_t m_best_overall_err; - color_quad_u8 m_low_endpoint; - color_quad_u8 m_high_endpoint; + color_rgba m_low_endpoint; + color_rgba m_high_endpoint; uint32_t m_pbits[2]; uint8_t *m_pSelectors; uint8_t *m_pSelectors_temp; -} color_cell_compressor_results; +}; -static inline color_quad_u8 scale_color(const color_quad_u8 *pC, const color_cell_compressor_params *pParams) +static inline color_rgba scale_color(const color_rgba *pC, const color_cell_compressor_params *pParams) { - color_quad_u8 results; + color_rgba results; const uint32_t n = pParams->m_comp_bits + (pParams->m_has_pbits ? 1 : 0); assert((n >= 4) && (n <= 8)); for (uint32_t i = 0; i < 4; i++) { - uint32_t v = (*pC)[i] << (8 - n); + uint32_t v = pC->m_c[i] << (8 - n); v |= (v >> n); assert(v <= 255); - results[i] = (uint8_t)(v); + results.m_c[i] = (uint8_t)(v); } return results; } -static inline uint64_t compute_color_distance_rgb(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +static inline uint64_t compute_color_distance_rgb(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4]) { int dr, dg, db; if (perceptual) { - const int l1 = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - const int cr1 = ((int)pE1->r << 9) - l1; - const int cb1 = ((int)pE1->b << 9) - l1; - const int l2 = pE2->r * 109 + pE2->g * 366 + pE2->b * 37; - const int cr2 = ((int)pE2->r << 9) - l2; - const int cb2 = ((int)pE2->b << 9) - l2; + const int l1 = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + const int cr1 = ((int)pE1->m_c[0] << 9) - l1; + const int cb1 = ((int)pE1->m_c[2] << 9) - l1; + const int l2 = pE2->m_c[0] * 109 + pE2->m_c[1] * 366 + pE2->m_c[2] * 37; + const int cr2 = ((int)pE2->m_c[0] << 9) - l2; + const int cb2 = ((int)pE2->m_c[2] << 9) - l2; dr = (l1 - l2) >> 8; dg = (cr1 - cr2) >> 8; db = (cb1 - cb2) >> 8; } else { - dr = (int)pE1->r - (int)pE2->r; - dg = (int)pE1->g - (int)pE2->g; - db = (int)pE1->b - (int)pE2->b; + dr = (int)pE1->m_c[0] - (int)pE2->m_c[0]; + dg = (int)pE1->m_c[1] - (int)pE2->m_c[1]; + db = (int)pE1->m_c[2] - (int)pE2->m_c[2]; } return weights[0] * (uint32_t)(dr * dr) + weights[1] * (uint32_t)(dg * dg) + weights[2] * (uint32_t)(db * db); } -static inline uint64_t compute_color_distance_rgba(const color_quad_u8 *pE1, const color_quad_u8 *pE2, bc7enc_bool perceptual, const uint32_t weights[4]) +static inline uint64_t compute_color_distance_rgba(const color_rgba *pE1, const color_rgba *pE2, bool perceptual, const uint32_t weights[4]) { - int da = (int)pE1->a - (int)pE2->a; + int da = (int)pE1->m_c[3] - (int)pE2->m_c[3]; return compute_color_distance_rgb(pE1, pE2, perceptual, weights) + (weights[3] * (uint32_t)(da * da)); } @@ -497,18 +562,18 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar memset(pSelectors, BC7ENC_MODE_1_OPTIMAL_INDEX, pParams->m_num_pixels); - color_quad_u8 p; + color_rgba p; for (uint32_t i = 0; i < 3; i++) { - uint32_t low = ((pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0]) << 1; + uint32_t low = ((pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; low |= (low >> 7); - uint32_t high = ((pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[0]) << 1; + uint32_t high = ((pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[0]) << 1; high |= (high >> 7); - p[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX]) + high * g_bc7_weights3[BC7ENC_MODE_1_OPTIMAL_INDEX] + 32) >> 6); } - p.a = 255; + p.m_c[3] = 255; uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -520,7 +585,7 @@ static uint64_t pack_mode1_to_one_color(const color_cell_compressor_params *pPar } static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, uint32_t r, uint32_t g, uint32_t b, uint32_t a, - uint8_t* pSelectors, uint32_t num_pixels, const color_quad_u8* pPixels) + uint8_t* pSelectors, uint32_t num_pixels, const color_rgba *pPixels) { uint32_t best_err = UINT_MAX; uint32_t best_p = 0; @@ -553,19 +618,19 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar pResults->m_pbits[1] = best_hi_p; for (uint32_t i = 0; i < num_pixels; i++) - pSelectors[i] = BC7E_MODE_7_OPTIMAL_INDEX; + pSelectors[i] = (uint8_t)BC7E_MODE_7_OPTIMAL_INDEX; - color_quad_u8 p; + color_rgba p; for (uint32_t i = 0; i < 4; i++) { - uint32_t low = (pResults->m_low_endpoint[i] << 1) | pResults->m_pbits[0]; - uint32_t high = (pResults->m_high_endpoint[i] << 1) | pResults->m_pbits[1]; + uint32_t low = (pResults->m_low_endpoint.m_c[i] << 1) | pResults->m_pbits[0]; + uint32_t high = (pResults->m_high_endpoint.m_c[i] << 1) | pResults->m_pbits[1]; low = (low << 2) | (low >> 6); high = (high << 2) | (high >> 6); - p[i] = (low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6; + p.m_c[i] = (uint8_t)((low * (64 - g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX]) + high * g_bc7_weights2[BC7E_MODE_7_OPTIMAL_INDEX] + 32) >> 6); } uint64_t total_err = 0; @@ -577,10 +642,11 @@ static uint64_t pack_mode7_to_one_color(const color_cell_compressor_params* pPar return total_err; } -static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +static uint64_t evaluate_solution(const color_rgba *pLow, const color_rgba *pHigh, const uint32_t pbits[2], const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, + const bc7enc_compress_block_params* pComp_params) { - color_quad_u8 quantMinColor = *pLow; - color_quad_u8 quantMaxColor = *pHigh; + color_rgba quantMinColor = *pLow; + color_rgba quantMaxColor = *pHigh; if (pParams->m_has_pbits) { @@ -594,62 +660,79 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 maxPBit = pbits[1]; } - quantMinColor.r = (uint8_t)((pLow->r << 1) | minPBit); - quantMinColor.g = (uint8_t)((pLow->g << 1) | minPBit); - quantMinColor.b = (uint8_t)((pLow->b << 1) | minPBit); - quantMinColor.a = (uint8_t)((pLow->a << 1) | minPBit); + quantMinColor.m_c[0] = (uint8_t)((pLow->m_c[0] << 1) | minPBit); + quantMinColor.m_c[1] = (uint8_t)((pLow->m_c[1] << 1) | minPBit); + quantMinColor.m_c[2] = (uint8_t)((pLow->m_c[2] << 1) | minPBit); + quantMinColor.m_c[3] = (uint8_t)((pLow->m_c[3] << 1) | minPBit); - quantMaxColor.r = (uint8_t)((pHigh->r << 1) | maxPBit); - quantMaxColor.g = (uint8_t)((pHigh->g << 1) | maxPBit); - quantMaxColor.b = (uint8_t)((pHigh->b << 1) | maxPBit); - quantMaxColor.a = (uint8_t)((pHigh->a << 1) | maxPBit); + quantMaxColor.m_c[0] = (uint8_t)((pHigh->m_c[0] << 1) | maxPBit); + quantMaxColor.m_c[1] = (uint8_t)((pHigh->m_c[1] << 1) | maxPBit); + quantMaxColor.m_c[2] = (uint8_t)((pHigh->m_c[2] << 1) | maxPBit); + quantMaxColor.m_c[3] = (uint8_t)((pHigh->m_c[3] << 1) | maxPBit); } - color_quad_u8 actualMinColor = scale_color(&quantMinColor, pParams); - color_quad_u8 actualMaxColor = scale_color(&quantMaxColor, pParams); + color_rgba actualMinColor = scale_color(&quantMinColor, pParams); + color_rgba actualMaxColor = scale_color(&quantMaxColor, pParams); const uint32_t N = pParams->m_num_selector_weights; - color_quad_u8 weightedColors[16]; + color_rgba weightedColors[16]; weightedColors[0] = actualMinColor; weightedColors[N - 1] = actualMaxColor; const uint32_t nc = pParams->m_has_alpha ? 4 : 3; for (uint32_t i = 1; i < (N - 1); i++) for (uint32_t j = 0; j < nc; j++) - weightedColors[i][j] = (uint8_t)((actualMinColor[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor[j] * pParams->m_pSelector_weights[i] + 32) >> 6); - - const int lr = actualMinColor.r; - const int lg = actualMinColor.g; - const int lb = actualMinColor.b; - const int dr = actualMaxColor.r - lr; - const int dg = actualMaxColor.g - lg; - const int db = actualMaxColor.b - lb; + weightedColors[i].m_c[j] = (uint8_t)((actualMinColor.m_c[j] * (64 - pParams->m_pSelector_weights[i]) + actualMaxColor.m_c[j] * pParams->m_pSelector_weights[i] + 32) >> 6); + + const int lr = actualMinColor.m_c[0]; + const int lg = actualMinColor.m_c[1]; + const int lb = actualMinColor.m_c[2]; + const int dr = actualMaxColor.m_c[0] - lr; + const int dg = actualMaxColor.m_c[1] - lg; + const int db = actualMaxColor.m_c[2] - lb; uint64_t total_err = 0; - - if (!pParams->m_perceptual) + + if (pComp_params->m_force_selectors) + { + for (uint32_t i = 0; i < pParams->m_num_pixels; i++) + { + const uint32_t best_sel = pComp_params->m_selectors[i]; + + uint64_t best_err; + if (pParams->m_has_alpha) + best_err = compute_color_distance_rgba(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + else + best_err = compute_color_distance_rgb(&weightedColors[best_sel], &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); + + total_err += best_err; + + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; + } + } + else if (!pParams->m_perceptual) { if (pParams->m_has_alpha) { - const int la = actualMinColor.a; - const int da = actualMaxColor.a - la; + const int la = actualMinColor.m_c[3]; + const int da = actualMaxColor.m_c[3] - la; const float f = N / (float)(squarei(dr) + squarei(dg) + squarei(db) + squarei(da) + .00000125f); for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pC = &pParams->m_pPixels[i]; - int r = pC->r; - int g = pC->g; - int b = pC->b; - int a = pC->a; + const color_rgba *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; + int a = pC->m_c[3]; int best_sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db + (a - la) * da) * f + .5f); best_sel = clampi(best_sel, 1, N - 1); - uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); - uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err0 = compute_color_distance_rgba(&weightedColors[best_sel - 1], pC, false, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgba(&weightedColors[best_sel], pC, false, pParams->m_weights); if (err1 > err0) { @@ -667,16 +750,16 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pC = &pParams->m_pPixels[i]; - int r = pC->r; - int g = pC->g; - int b = pC->b; + const color_rgba *pC = &pParams->m_pPixels[i]; + int r = pC->m_c[0]; + int g = pC->m_c[1]; + int b = pC->m_c[2]; int sel = (int)((float)((r - lr) * dr + (g - lg) * dg + (b - lb) * db) * f + .5f); sel = clampi(sel, 1, N - 1); - uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, BC7ENC_FALSE, pParams->m_weights); - uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, BC7ENC_FALSE, pParams->m_weights); + uint64_t err0 = compute_color_distance_rgb(&weightedColors[sel - 1], pC, false, pParams->m_weights); + uint64_t err1 = compute_color_distance_rgb(&weightedColors[sel], pC, false, pParams->m_weights); int best_sel = sel; uint64_t best_err = err1; @@ -704,7 +787,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 { for (uint32_t j = 0; j < N; j++) { - uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + uint64_t err = compute_color_distance_rgba(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights); if (err < best_err) { best_err = err; @@ -716,7 +799,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 { for (uint32_t j = 0; j < N; j++) { - uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], BC7ENC_TRUE, pParams->m_weights); + uint64_t err = compute_color_distance_rgb(&weightedColors[j], &pParams->m_pPixels[i], true, pParams->m_weights); if (err < best_err) { best_err = err; @@ -747,32 +830,34 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 return total_err; } -static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, color_quad_u8 *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale) +static void fixDegenerateEndpoints(uint32_t mode, color_rgba *pTrialMinColor, color_rgba *pTrialMaxColor, const vec4F *pXl, const vec4F *pXh, uint32_t iscale, + const bc7enc_compress_block_params* pComp_params) { //if ((mode == 1) || (mode == 7)) - if (mode == 1) + //if (mode == 1) + if ( (mode == 1) || ((mode == 6) && (pComp_params->m_quant_mode6_endpoints)) ) { // fix degenerate case where the input collapses to a single colorspace voxel, and we loose all freedom (test with grayscale ramps) for (uint32_t i = 0; i < 3; i++) { - if ((*pTrialMinColor)[i] == (*pTrialMaxColor)[i]) + if (pTrialMinColor->m_c[i] == pTrialMaxColor->m_c[i]) { - if (fabs((*pXl)[i] - (*pXh)[i]) > 0.0f) + if (fabs(pXl->m_c[i] - pXh->m_c[i]) > 0.0f) { - if ((*pTrialMinColor)[i] > (iscale >> 1)) + if (pTrialMinColor->m_c[i] > (iscale >> 1)) { - if ((*pTrialMinColor)[i] > 0) - (*pTrialMinColor)[i]--; + if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; else - if ((*pTrialMaxColor)[i] < iscale) - (*pTrialMaxColor)[i]++; + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; } else { - if ((*pTrialMaxColor)[i] < iscale) - (*pTrialMaxColor)[i]++; - else if ((*pTrialMinColor)[i] > 0) - (*pTrialMinColor)[i]--; + if (pTrialMaxColor->m_c[i] < iscale) + pTrialMaxColor->m_c[i]++; + else if (pTrialMinColor->m_c[i] > 0) + pTrialMinColor->m_c[i]--; } } } @@ -780,7 +865,8 @@ static void fixDegenerateEndpoints(uint32_t mode, color_quad_u8 *pTrialMinColor, } } -static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults) +static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const color_cell_compressor_params *pParams, color_cell_compressor_results *pResults, + const bc7enc_compress_block_params* pComp_params) { vec4F_saturate_in_place(&xl); vec4F_saturate_in_place(&xh); @@ -792,114 +878,221 @@ static uint64_t find_optimal_solution(uint32_t mode, vec4F xl, vec4F xh, const c const int32_t totalComps = pParams->m_has_alpha ? 4 : 3; uint32_t best_pbits[2]; - color_quad_u8 bestMinColor, bestMaxColor; + color_rgba bestMinColor, bestMaxColor; if (!pParams->m_endpoints_share_pbit) { - float best_err0 = 1e+9; - float best_err1 = 1e+9; - - for (int p = 0; p < 2; p++) + if ((pParams->m_comp_bits == 7) && (pComp_params->m_quant_mode6_endpoints)) { - color_quad_u8 xMinColor, xMaxColor; + best_pbits[0] = 0; + bestMinColor.m_c[0] = g_mode6_reduced_quant[(int)((xl.m_c[0] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[1] = g_mode6_reduced_quant[(int)((xl.m_c[1] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[2] = g_mode6_reduced_quant[(int)((xl.m_c[2] * 2047.0f) + .5f)][0]; + bestMinColor.m_c[3] = g_mode6_reduced_quant[(int)((xl.m_c[3] * 2047.0f) + .5f)][0]; + + best_pbits[1] = 1; + bestMaxColor.m_c[0] = g_mode6_reduced_quant[(int)((xh.m_c[0] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[1] = g_mode6_reduced_quant[(int)((xh.m_c[1] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[2] = g_mode6_reduced_quant[(int)((xh.m_c[2] * 2047.0f) + .5f)][1]; + bestMaxColor.m_c[3] = g_mode6_reduced_quant[(int)((xh.m_c[3] * 2047.0f) + .5f)][1]; + } + else + { + float best_err0 = 1e+9; + float best_err1 = 1e+9; - // Notes: The pbit controls which quantization intervals are selected. - // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. - // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value - // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) - // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) - for (uint32_t c = 0; c < 4; c++) + for (int p = 0; p < 2; p++) { - xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - } + color_rgba xMinColor, xMaxColor; + + // Notes: The pbit controls which quantization intervals are selected. + // total_levels=2^(comp_bits+1), where comp_bits=4 for mode 0, etc. + // pbit 0: v=(b*2)/(total_levels-1), pbit 1: v=(b*2+1)/(total_levels-1) where b is the component bin from [0,total_levels/2-1] and v is the [0,1] component value + // rearranging you get for pbit 0: b=floor(v*(total_levels-1)/2+.5) + // rearranging you get for pbit 1: b=floor((v*(total_levels-1)-1)/2+.5) + if (pParams->m_comp_bits == 5) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 31.0f); + vl += (xl.m_c[c] > g_mode7_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 63 - 1 + p); - color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); - color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + int vh = (int)(xh.m_c[c] * 31.0f); + vh += (xh.m_c[c] > g_mode7_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 63 - 1 + p); + } + } + else + { + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + } - float err0 = 0, err1 = 0; - for (int i = 0; i < totalComps; i++) - { - err0 += squaref(scaledLow[i] - xl[i] * 255.0f); - err1 += squaref(scaledHigh[i] - xh[i] * 255.0f); - } + color_rgba scaledLow = scale_color(&xMinColor, pParams); + color_rgba scaledHigh = scale_color(&xMaxColor, pParams); - if (err0 < best_err0) - { - best_err0 = err0; - best_pbits[0] = p; + float err0 = 0, err1 = 0; + for (int i = 0; i < totalComps; i++) + { + err0 += squaref(scaledLow.m_c[i] - xl.m_c[i] * 255.0f); + err1 += squaref(scaledHigh.m_c[i] - xh.m_c[i] * 255.0f); + } - bestMinColor.r = xMinColor.r >> 1; - bestMinColor.g = xMinColor.g >> 1; - bestMinColor.b = xMinColor.b >> 1; - bestMinColor.a = xMinColor.a >> 1; - } + if (p == 1) + { + err0 *= pComp_params->m_pbit1_weight; + err1 *= pComp_params->m_pbit1_weight; + } + + if (err0 < best_err0) + { + best_err0 = err0; + best_pbits[0] = p; - if (err1 < best_err1) - { - best_err1 = err1; - best_pbits[1] = p; + bestMinColor.m_c[0] = xMinColor.m_c[0] >> 1; + bestMinColor.m_c[1] = xMinColor.m_c[1] >> 1; + bestMinColor.m_c[2] = xMinColor.m_c[2] >> 1; + bestMinColor.m_c[3] = xMinColor.m_c[3] >> 1; + } + + if (err1 < best_err1) + { + best_err1 = err1; + best_pbits[1] = p; - bestMaxColor.r = xMaxColor.r >> 1; - bestMaxColor.g = xMaxColor.g >> 1; - bestMaxColor.b = xMaxColor.b >> 1; - bestMaxColor.a = xMaxColor.a >> 1; + bestMaxColor.m_c[0] = xMaxColor.m_c[0] >> 1; + bestMaxColor.m_c[1] = xMaxColor.m_c[1] >> 1; + bestMaxColor.m_c[2] = xMaxColor.m_c[2] >> 1; + bestMaxColor.m_c[3] = xMaxColor.m_c[3] >> 1; + } } } } else { - // Endpoints share pbits - float best_err = 1e+9; - - for (int p = 0; p < 2; p++) + if ((mode == 1) && (pComp_params->m_bias_mode1_pbits)) { - color_quad_u8 xMinColor, xMaxColor; + float x = 0.0f; + for (uint32_t c = 0; c < 3; c++) + x = std::max(std::max(x, xl.m_c[c]), xh.m_c[c]); + + int p = 0; + if (x > (253.0f / 255.0f)) + p = 1; + + color_rgba xMinColor, xMaxColor; for (uint32_t c = 0; c < 4; c++) { - xMinColor[c] = (uint8_t)(clampi(((int)((xl[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - xMaxColor[c] = (uint8_t)(clampi(((int)((xh[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); - } + int vl = (int)(xl.m_c[c] * 63.0f); + vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p); - color_quad_u8 scaledLow = scale_color(&xMinColor, pParams); - color_quad_u8 scaledHigh = scale_color(&xMaxColor, pParams); + int vh = (int)(xh.m_c[c] * 63.0f); + vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p); + } - float err = 0; - for (int i = 0; i < totalComps; i++) - err += squaref((scaledLow[i] / 255.0f) - xl[i]) + squaref((scaledHigh[i] / 255.0f) - xh[i]); + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } + } + else + { + // Endpoints share pbits + float best_err = 1e+9; - if (err < best_err) + for (int p = 0; p < 2; p++) { - best_err = err; - best_pbits[0] = p; - best_pbits[1] = p; - for (uint32_t j = 0; j < 4; j++) + color_rgba xMinColor, xMaxColor; + if (pParams->m_comp_bits == 6) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 63.0f); + vl += (xl.m_c[c] > g_mode1_rgba_midpoints[vl][p]); + xMinColor.m_c[c] = (uint8_t)clampi(vl * 2 + p, p, 127 - 1 + p); + + int vh = (int)(xh.m_c[c] * 63.0f); + vh += (xh.m_c[c] > g_mode1_rgba_midpoints[vh][p]); + xMaxColor.m_c[c] = (uint8_t)clampi(vh * 2 + p, p, 127 - 1 + p); + } + } + else { - bestMinColor[j] = xMinColor[j] >> 1; - bestMaxColor[j] = xMaxColor[j] >> 1; + for (uint32_t c = 0; c < 4; c++) + { + xMinColor.m_c[c] = (uint8_t)(clampi(((int)((xl.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + xMaxColor.m_c[c] = (uint8_t)(clampi(((int)((xh.m_c[c] * scalep - p) / 2.0f + .5f)) * 2 + p, p, iscalep - 1 + p)); + } + } + + color_rgba scaledLow = scale_color(&xMinColor, pParams); + color_rgba scaledHigh = scale_color(&xMaxColor, pParams); + + float err = 0; + for (int i = 0; i < totalComps; i++) + err += squaref((scaledLow.m_c[i] / 255.0f) - xl.m_c[i]) + squaref((scaledHigh.m_c[i] / 255.0f) - xh.m_c[i]); + + if (p == 1) + err *= pComp_params->m_pbit1_weight; + + if (err < best_err) + { + best_err = err; + best_pbits[0] = p; + best_pbits[1] = p; + for (uint32_t j = 0; j < 4; j++) + { + bestMinColor.m_c[j] = xMinColor.m_c[j] >> 1; + bestMaxColor.m_c[j] = xMaxColor.m_c[j] >> 1; + } } } } } - fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1); + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, pComp_params); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) - evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults); + evaluate_solution(&bestMinColor, &bestMaxColor, best_pbits, pParams, pResults, pComp_params); } else { const int iscale = (1 << pParams->m_comp_bits) - 1; const float scale = (float)iscale; - color_quad_u8 trialMinColor, trialMaxColor; - color_quad_u8_set_clamped(&trialMinColor, (int)(xl.r * scale + .5f), (int)(xl.g * scale + .5f), (int)(xl.b * scale + .5f), (int)(xl.a * scale + .5f)); - color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.r * scale + .5f), (int)(xh.g * scale + .5f), (int)(xh.b * scale + .5f), (int)(xh.a * scale + .5f)); + color_rgba trialMinColor, trialMaxColor; + if (pParams->m_comp_bits == 7) + { + for (uint32_t c = 0; c < 4; c++) + { + int vl = (int)(xl.m_c[c] * 127.0f); + vl += (xl.m_c[c] > g_mode5_rgba_midpoints[vl]); + trialMinColor.m_c[c] = (uint8_t)clampi(vl, 0, 127); + + int vh = (int)(xh.m_c[c] * 127.0f); + vh += (xh.m_c[c] > g_mode5_rgba_midpoints[vh]); + trialMaxColor.m_c[c] = (uint8_t)clampi(vh, 0, 127); + } + } + else + { + color_quad_u8_set_clamped(&trialMinColor, (int)(xl.m_c[0] * scale + .5f), (int)(xl.m_c[1] * scale + .5f), (int)(xl.m_c[2] * scale + .5f), (int)(xl.m_c[3] * scale + .5f)); + color_quad_u8_set_clamped(&trialMaxColor, (int)(xh.m_c[0] * scale + .5f), (int)(xh.m_c[1] * scale + .5f), (int)(xh.m_c[2] * scale + .5f), (int)(xh.m_c[3] * scale + .5f)); + } - fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale); + fixDegenerateEndpoints(mode, &trialMinColor, &trialMaxColor, &xl, &xh, iscale, pComp_params); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&trialMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&trialMaxColor, &pResults->m_high_endpoint)) - evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults); + evaluate_solution(&trialMinColor, &trialMaxColor, pResults->m_pbits, pParams, pResults, pComp_params); } return pResults->m_best_overall_err; @@ -914,14 +1107,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso // If the partition's colors are all the same in mode 1, then just pack them as a single color. if (mode == 1) { - const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b; + const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2]; - bc7enc_bool allSame = BC7ENC_TRUE; + bool allSame = true; for (uint32_t i = 1; i < pParams->m_num_pixels; i++) { - if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b)) + if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2])) { - allSame = BC7ENC_FALSE; + allSame = false; break; } } @@ -931,14 +1124,14 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso } else if (mode == 7) { - const uint32_t cr = pParams->m_pPixels[0].r, cg = pParams->m_pPixels[0].g, cb = pParams->m_pPixels[0].b, ca = pParams->m_pPixels[0].a; + const uint32_t cr = pParams->m_pPixels[0].m_c[0], cg = pParams->m_pPixels[0].m_c[1], cb = pParams->m_pPixels[0].m_c[2], ca = pParams->m_pPixels[0].m_c[3]; - bc7enc_bool allSame = BC7ENC_TRUE; + bool allSame = true; for (uint32_t i = 1; i < pParams->m_num_pixels; i++) { - if ((cr != pParams->m_pPixels[i].r) || (cg != pParams->m_pPixels[i].g) || (cb != pParams->m_pPixels[i].b) || (ca != pParams->m_pPixels[i].a)) + if ((cr != pParams->m_pPixels[i].m_c[0]) || (cg != pParams->m_pPixels[i].m_c[1]) || (cb != pParams->m_pPixels[i].m_c[2]) || (ca != pParams->m_pPixels[i].m_c[3])) { - allSame = BC7ENC_FALSE; + allSame = false; break; } } @@ -970,16 +1163,16 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); color = vec4F_sub(&color, &meanColorScaled); - vec4F a = vec4F_mul(&color, color.r); - vec4F b = vec4F_mul(&color, color.g); - vec4F c = vec4F_mul(&color, color.b); - vec4F d = vec4F_mul(&color, color.a); + vec4F a = vec4F_mul(&color, color.m_c[0]); + vec4F b = vec4F_mul(&color, color.m_c[1]); + vec4F c = vec4F_mul(&color, color.m_c[2]); + vec4F d = vec4F_mul(&color, color.m_c[3]); vec4F n = i ? axis : color; vec4F_normalize_in_place(&n); - axis.r += vec4F_dot(&a, &n); - axis.g += vec4F_dot(&b, &n); - axis.b += vec4F_dot(&c, &n); - axis.a += vec4F_dot(&d, &n); + axis.m_c[0] += vec4F_dot(&a, &n); + axis.m_c[1] += vec4F_dot(&b, &n); + axis.m_c[2] += vec4F_dot(&c, &n); + axis.m_c[3] += vec4F_dot(&d, &n); } vec4F_normalize_in_place(&axis); } @@ -990,10 +1183,10 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { - const color_quad_u8 *pV = &pParams->m_pPixels[i]; - float r = pV->r - meanColorScaled.r; - float g = pV->g - meanColorScaled.g; - float b = pV->b - meanColorScaled.b; + const color_rgba *pV = &pParams->m_pPixels[i]; + float r = pV->m_c[0] - meanColorScaled.m_c[0]; + float g = pV->m_c[1] - meanColorScaled.m_c[1]; + float b = pV->m_c[2] - meanColorScaled.m_c[2]; cov[0] += r*r; cov[1] += r*g; cov[2] += r*b; cov[3] += g*g; cov[4] += g*b; cov[5] += b*b; } @@ -1070,20 +1263,20 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso minColor = maxColor; maxColor = temp; #else - float a = minColor.r, b = minColor.g, c = minColor.b, d = minColor.a; - minColor.r = maxColor.r; - minColor.g = maxColor.g; - minColor.b = maxColor.b; - minColor.a = maxColor.a; - maxColor.r = a; - maxColor.g = b; - maxColor.b = c; - maxColor.a = d; + float a = minColor.m_c[0], b = minColor.m_c[1], c = minColor.m_c[2], d = minColor.m_c[3]; + minColor.m_c[0] = maxColor.m_c[0]; + minColor.m_c[1] = maxColor.m_c[1]; + minColor.m_c[2] = maxColor.m_c[2]; + minColor.m_c[3] = maxColor.m_c[3]; + maxColor.m_c[0] = a; + maxColor.m_c[1] = b; + maxColor.m_c[2] = c; + maxColor.m_c[3] = d; #endif } // First find a solution using the block's PCA. - if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) + if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults, pComp_params)) return 0; if (pComp_params->m_try_least_squares) @@ -1100,7 +1293,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; } @@ -1141,7 +1334,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -1160,7 +1353,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) @@ -1181,7 +1374,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; // In uber levels 2+, try taking more advantage of endpoint extrapolation by scaling the selectors in one direction or another. @@ -1210,7 +1403,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) + if (!find_optimal_solution(mode, xl, xh, pParams, pResults, pComp_params)) return 0; } } @@ -1221,7 +1414,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { // Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean. color_cell_compressor_results avg_results = *pResults; - const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f); + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f); uint64_t avg_err = pack_mode1_to_one_color(pParams, &avg_results, r, g, b, pResults->m_pSelectors_temp); if (avg_err < pResults->m_best_overall_err) { @@ -1234,7 +1427,7 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso { // Try encoding the partition as a single color by using the optimal singe colors tables to encode the block to its mean. color_cell_compressor_results avg_results = *pResults; - const uint32_t r = (int)(.5f + meanColor.r * 255.0f), g = (int)(.5f + meanColor.g * 255.0f), b = (int)(.5f + meanColor.b * 255.0f), a = (int)(.5f + meanColor.a * 255.0f); + const uint32_t r = (int)(.5f + meanColor.m_c[0] * 255.0f), g = (int)(.5f + meanColor.m_c[1] * 255.0f), b = (int)(.5f + meanColor.m_c[2] * 255.0f), a = (int)(.5f + meanColor.m_c[3] * 255.0f); uint64_t avg_err = pack_mode7_to_one_color(pParams, &avg_results, r, g, b, a, pResults->m_pSelectors_temp, pParams->m_num_pixels, pParams->m_pPixels); if (avg_err < pResults->m_best_overall_err) { @@ -1247,46 +1440,46 @@ static uint64_t color_cell_compression(uint32_t mode, const color_cell_compresso return pResults->m_best_overall_err; } -static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_quad_u8 *pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) +static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const color_rgba *pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) { // Find RGB bounds as an approximation of the block's principle axis uint32_t lr = 255, lg = 255, lb = 255; uint32_t hr = 0, hg = 0, hb = 0; for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; - if (pC->r < lr) lr = pC->r; - if (pC->g < lg) lg = pC->g; - if (pC->b < lb) lb = pC->b; - if (pC->r > hr) hr = pC->r; - if (pC->g > hg) hg = pC->g; - if (pC->b > hb) hb = pC->b; + const color_rgba *pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; } - color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0); - color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0); + color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, 0); + color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, 0); // Place endpoints at bbox diagonals and compute interpolated colors const uint32_t N = 8; - color_quad_u8 weightedColors[8]; + color_rgba weightedColors[8]; weightedColors[0] = lowColor; weightedColors[N - 1] = highColor; for (uint32_t i = 1; i < (N - 1); i++) { - weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights3[i]) + highColor.r * g_bc7_weights3[i] + 32) >> 6); - weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights3[i]) + highColor.g * g_bc7_weights3[i] + 32) >> 6); - weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights3[i]) + highColor.b * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights3[i]) + highColor.m_c[0] * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights3[i]) + highColor.m_c[1] * g_bc7_weights3[i] + 32) >> 6); + weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights3[i]) + highColor.m_c[2] * g_bc7_weights3[i] + 32) >> 6); } // Compute dots and thresholds - const int ar = highColor.r - lowColor.r; - const int ag = highColor.g - lowColor.g; - const int ab = highColor.b - lowColor.b; + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; int dots[8]; for (uint32_t i = 0; i < N; i++) - dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab; + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab; int thresh[8 - 1]; for (uint32_t i = 0; i < (N - 1); i++) @@ -1299,17 +1492,17 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo int l1[8], cr1[8], cb1[8]; for (int j = 0; j < 8; j++) { - const color_quad_u8 *pE1 = &weightedColors[j]; - l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - cr1[j] = ((int)pE1->r << 9) - l1[j]; - cb1[j] = ((int)pE1->b << 9) - l1[j]; + const color_rgba *pE1 = &weightedColors[j]; + l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j]; + cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j]; } for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; + const color_rgba *pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; // Find approximate selector uint32_t s = 0; @@ -1329,9 +1522,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo s = 1; // Compute error - const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37; - const int cr2 = ((int)pC->r << 9) - l2; - const int cb2 = ((int)pC->b << 9) - l2; + const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37; + const int cr2 = ((int)pC->m_c[0] << 9) - l2; + const int cb2 = ((int)pC->m_c[2] << 9) - l2; const int dl = (l1[s] - l2) >> 8; const int dcr = (cr1[s] - cr2) >> 8; @@ -1348,9 +1541,9 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo { for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8 *pC = &pPixels[i]; + const color_rgba *pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2]; // Find approximate selector uint32_t s = 0; @@ -1370,11 +1563,11 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo s = 1; // Compute error - const color_quad_u8 *pE1 = &weightedColors[s]; + const color_rgba *pE1 = &weightedColors[s]; - int dr = (int)pE1->r - (int)pC->r; - int dg = (int)pE1->g - (int)pC->g; - int db = (int)pE1->b - (int)pC->b; + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db); if (total_err > best_err_so_far) @@ -1385,51 +1578,51 @@ static uint64_t color_cell_compression_est_mode1(uint32_t num_pixels, const colo return total_err; } -static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_quad_u8* pPixels, bc7enc_bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) +static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const color_rgba * pPixels, bool perceptual, uint32_t pweights[4], uint64_t best_err_so_far) { // Find RGB bounds as an approximation of the block's principle axis uint32_t lr = 255, lg = 255, lb = 255, la = 255; uint32_t hr = 0, hg = 0, hb = 0, ha = 0; for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; - if (pC->r < lr) lr = pC->r; - if (pC->g < lg) lg = pC->g; - if (pC->b < lb) lb = pC->b; - if (pC->a < la) la = pC->a; - - if (pC->r > hr) hr = pC->r; - if (pC->g > hg) hg = pC->g; - if (pC->b > hb) hb = pC->b; - if (pC->a > ha) ha = pC->a; + const color_rgba* pC = &pPixels[i]; + if (pC->m_c[0] < lr) lr = pC->m_c[0]; + if (pC->m_c[1] < lg) lg = pC->m_c[1]; + if (pC->m_c[2] < lb) lb = pC->m_c[2]; + if (pC->m_c[3] < la) la = pC->m_c[3]; + + if (pC->m_c[0] > hr) hr = pC->m_c[0]; + if (pC->m_c[1] > hg) hg = pC->m_c[1]; + if (pC->m_c[2] > hb) hb = pC->m_c[2]; + if (pC->m_c[3] > ha) ha = pC->m_c[3]; } - color_quad_u8 lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la); - color_quad_u8 highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha); + color_rgba lowColor; color_quad_u8_set(&lowColor, lr, lg, lb, la); + color_rgba highColor; color_quad_u8_set(&highColor, hr, hg, hb, ha); // Place endpoints at bbox diagonals and compute interpolated colors const uint32_t N = 4; - color_quad_u8 weightedColors[4]; + color_rgba weightedColors[4]; weightedColors[0] = lowColor; weightedColors[N - 1] = highColor; for (uint32_t i = 1; i < (N - 1); i++) { - weightedColors[i].r = (uint8_t)((lowColor.r * (64 - g_bc7_weights2[i]) + highColor.r * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].g = (uint8_t)((lowColor.g * (64 - g_bc7_weights2[i]) + highColor.g * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].b = (uint8_t)((lowColor.b * (64 - g_bc7_weights2[i]) + highColor.b * g_bc7_weights2[i] + 32) >> 6); - weightedColors[i].a = (uint8_t)((lowColor.a * (64 - g_bc7_weights2[i]) + highColor.a * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[0] = (uint8_t)((lowColor.m_c[0] * (64 - g_bc7_weights2[i]) + highColor.m_c[0] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[1] = (uint8_t)((lowColor.m_c[1] * (64 - g_bc7_weights2[i]) + highColor.m_c[1] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[2] = (uint8_t)((lowColor.m_c[2] * (64 - g_bc7_weights2[i]) + highColor.m_c[2] * g_bc7_weights2[i] + 32) >> 6); + weightedColors[i].m_c[3] = (uint8_t)((lowColor.m_c[3] * (64 - g_bc7_weights2[i]) + highColor.m_c[3] * g_bc7_weights2[i] + 32) >> 6); } // Compute dots and thresholds - const int ar = highColor.r - lowColor.r; - const int ag = highColor.g - lowColor.g; - const int ab = highColor.b - lowColor.b; - const int aa = highColor.a - lowColor.a; + const int ar = highColor.m_c[0] - lowColor.m_c[0]; + const int ag = highColor.m_c[1] - lowColor.m_c[1]; + const int ab = highColor.m_c[2] - lowColor.m_c[2]; + const int aa = highColor.m_c[3] - lowColor.m_c[3]; int dots[4]; for (uint32_t i = 0; i < N; i++) - dots[i] = weightedColors[i].r * ar + weightedColors[i].g * ag + weightedColors[i].b * ab + weightedColors[i].a * aa; + dots[i] = weightedColors[i].m_c[0] * ar + weightedColors[i].m_c[1] * ag + weightedColors[i].m_c[2] * ab + weightedColors[i].m_c[3] * aa; int thresh[4 - 1]; for (uint32_t i = 0; i < (N - 1); i++) @@ -1442,17 +1635,17 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo int l1[4], cr1[4], cb1[4]; for (int j = 0; j < 4; j++) { - const color_quad_u8* pE1 = &weightedColors[j]; - l1[j] = pE1->r * 109 + pE1->g * 366 + pE1->b * 37; - cr1[j] = ((int)pE1->r << 9) - l1[j]; - cb1[j] = ((int)pE1->b << 9) - l1[j]; + const color_rgba* pE1 = &weightedColors[j]; + l1[j] = pE1->m_c[0] * 109 + pE1->m_c[1] * 366 + pE1->m_c[2] * 37; + cr1[j] = ((int)pE1->m_c[0] << 9) - l1[j]; + cb1[j] = ((int)pE1->m_c[2] << 9) - l1[j]; } for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; + const color_rgba* pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; // Find approximate selector uint32_t s = 0; @@ -1464,15 +1657,15 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo s = 1; // Compute error - const int l2 = pC->r * 109 + pC->g * 366 + pC->b * 37; - const int cr2 = ((int)pC->r << 9) - l2; - const int cb2 = ((int)pC->b << 9) - l2; + const int l2 = pC->m_c[0] * 109 + pC->m_c[1] * 366 + pC->m_c[2] * 37; + const int cr2 = ((int)pC->m_c[0] << 9) - l2; + const int cb2 = ((int)pC->m_c[2] << 9) - l2; const int dl = (l1[s] - l2) >> 8; const int dcr = (cr1[s] - cr2) >> 8; const int dcb = (cb1[s] - cb2) >> 8; - const int dca = (int)pC->a - (int)weightedColors[s].a; + const int dca = (int)pC->m_c[3] - (int)weightedColors[s].m_c[3]; int ie = (pweights[0] * dl * dl) + (pweights[1] * dcr * dcr) + (pweights[2] * dcb * dcb) + (pweights[3] * dca * dca); @@ -1485,9 +1678,9 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo { for (uint32_t i = 0; i < num_pixels; i++) { - const color_quad_u8* pC = &pPixels[i]; + const color_rgba* pC = &pPixels[i]; - int d = ar * pC->r + ag * pC->g + ab * pC->b + aa * pC->a; + int d = ar * pC->m_c[0] + ag * pC->m_c[1] + ab * pC->m_c[2] + aa * pC->m_c[3]; // Find approximate selector uint32_t s = 0; @@ -1499,12 +1692,12 @@ static uint64_t color_cell_compression_est_mode7(uint32_t num_pixels, const colo s = 1; // Compute error - const color_quad_u8* pE1 = &weightedColors[s]; + const color_rgba* pE1 = &weightedColors[s]; - int dr = (int)pE1->r - (int)pC->r; - int dg = (int)pE1->g - (int)pC->g; - int db = (int)pE1->b - (int)pC->b; - int da = (int)pE1->a - (int)pC->a; + int dr = (int)pE1->m_c[0] - (int)pC->m_c[0]; + int dg = (int)pE1->m_c[1] - (int)pC->m_c[1]; + int db = (int)pE1->m_c[2] - (int)pC->m_c[2]; + int da = (int)pE1->m_c[3] - (int)pC->m_c[3]; total_err += pweights[0] * (dr * dr) + pweights[1] * (dg * dg) + pweights[2] * (db * db) + pweights[3] * (da * da); if (total_err > best_err_so_far) @@ -1558,9 +1751,9 @@ static const uint32_t g_partition_predictors[35] = }; // Estimate the partition used by modes 1/7. This scans through each partition and computes an approximate error for each. -static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode) +static uint32_t estimate_partition(const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, uint32_t pweights[4], uint32_t mode) { - const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions_mode, BC7ENC_MAX_PARTITIONS1); + const uint32_t total_partitions = minimumu(pComp_params->m_max_partitions, BC7ENC_MAX_PARTITIONS); if (total_partitions <= 1) return 0; @@ -1590,7 +1783,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co const uint32_t partition = s_sorted_partition_order[partition_iter]; // Check to see if we should bother evaluating this partition at all, depending on the best partition found from the first 14. - if (pComp_params->m_mode_partition_estimation_filterbank) + if (pComp_params->m_mode17_partition_estimation_filterbank) { if ((partition_iter >= 14) && (partition_iter <= 34)) { @@ -1607,7 +1800,7 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co const uint8_t *pPartition = &g_bc7_partition2[partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors[2] = { 0, 0 }; for (uint32_t index = 0; index < 16; index++) subset_colors[pPartition[index]][subset_total_colors[pPartition[index]]++] = pPixels[index]; @@ -1621,6 +1814,11 @@ static uint32_t estimate_partition(const color_quad_u8 *pPixels, const bc7enc_co total_subset_err += color_cell_compression_est_mode1(subset_total_colors[subset], &subset_colors[subset][0], pComp_params->m_perceptual, pweights, best_err); } + if (partition < 16) + { + total_subset_err = (uint64_t)((double)total_subset_err * pComp_params->m_low_frequency_partition_weight + .5f); + } + if (total_subset_err < best_err) { best_err = total_subset_err; @@ -1653,20 +1851,20 @@ static void set_block_bits(uint8_t *pBytes, uint32_t val, uint32_t num_bits, uin assert(*pCur_ofs <= 128); } -typedef struct +struct bc7_optimization_results { uint32_t m_mode; uint32_t m_partition; uint8_t m_selectors[16]; uint8_t m_alpha_selectors[16]; - color_quad_u8 m_low[3]; - color_quad_u8 m_high[3]; + color_rgba m_low[3]; + color_rgba m_high[3]; uint32_t m_pbits[3][2]; uint32_t m_rotation; uint32_t m_index_selector; -} bc7_optimization_results; +}; -static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) +void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResults) { assert(pResults->m_index_selector <= 1); assert(pResults->m_rotation <= 3); @@ -1692,7 +1890,7 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu uint8_t alpha_selectors[16]; memcpy(alpha_selectors, pResults->m_alpha_selectors, 16); - color_quad_u8 low[3], high[3]; + color_rgba low[3], high[3]; memcpy(low, pResults->m_low, sizeof(low)); memcpy(high, pResults->m_high, sizeof(high)); @@ -1729,14 +1927,14 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu { for (uint32_t q = 0; q < 3; q++) { - uint8_t t = low[k][q]; - low[k][q] = high[k][q]; - high[k][q] = t; + uint8_t t = low[k].m_c[q]; + low[k].m_c[q] = high[k].m_c[q]; + high[k].m_c[q] = t; } } else { - color_quad_u8 tmp = low[k]; + color_rgba tmp = low[k]; low[k] = high[k]; high[k] = tmp; } @@ -1760,9 +1958,9 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu if (pPartition[i] == k) alpha_selectors[i] = (uint8_t)((num_alpha_indices - 1) - alpha_selectors[i]); - uint8_t t = low[k].a; - low[k].a = high[k].a; - high[k].a = t; + uint8_t t = low[k].m_c[3]; + low[k].m_c[3] = high[k].m_c[3]; + high[k].m_c[3] = t; } } } @@ -1787,8 +1985,8 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu { for (uint32_t subset = 0; subset < total_subsets; subset++) { - set_block_bits(pBlock_bytes, low[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); - set_block_bits(pBlock_bytes, high[subset][comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + set_block_bits(pBlock_bytes, low[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); + set_block_bits(pBlock_bytes, high[subset].m_c[comp], (comp == 3) ? g_bc7_alpha_precision_table[best_mode] : g_bc7_color_precision_table[best_mode], &cur_bit_ofs); } } @@ -1838,16 +2036,16 @@ static void encode_bc7_block(void* pBlock, const bc7_optimization_results* pResu assert(cur_bit_ofs == 128); } -static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err) +static void handle_alpha_block_mode5(const color_rgba* pPixels, const bc7enc_compress_block_params* pComp_params, color_cell_compressor_params* pParams, uint32_t lo_a, uint32_t hi_a, bc7_optimization_results* pOpt_results5, uint64_t* pMode5_err, uint64_t* pMode5_alpha_err) { pParams->m_pSelector_weights = g_bc7_weights2; pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x; pParams->m_num_selector_weights = 4; pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_FALSE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_FALSE; + pParams->m_has_pbits = false; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = false; pParams->m_perceptual = pComp_params->m_perceptual; @@ -1869,8 +2067,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ if (lo_a == hi_a) { *pMode5_alpha_err = 0; - pOpt_results5->m_low[0].a = (uint8_t)lo_a; - pOpt_results5->m_high[0].a = (uint8_t)hi_a; + pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a; + pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a; memset(pOpt_results5->m_alpha_selectors, 0, sizeof(pOpt_results5->m_alpha_selectors)); } else @@ -1893,7 +2091,7 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ uint64_t trial_alpha_err = 0; for (uint32_t i = 0; i < 16; i++) { - const int32_t a = pParams->m_pPixels[i].a; + const int32_t a = pParams->m_pPixels[i].m_c[3]; int s = 0; int32_t be = iabs32(a - vals[0]); @@ -1911,8 +2109,8 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ if (trial_alpha_err < *pMode5_alpha_err) { *pMode5_alpha_err = trial_alpha_err; - pOpt_results5->m_low[0].a = (uint8_t)lo_a; - pOpt_results5->m_high[0].a = (uint8_t)hi_a; + pOpt_results5->m_low[0].m_c[3] = (uint8_t)lo_a; + pOpt_results5->m_high[0].m_c[3] = (uint8_t)hi_a; memcpy(pOpt_results5->m_alpha_selectors, trial_alpha_selectors, sizeof(pOpt_results5->m_alpha_selectors)); } @@ -1938,35 +2136,44 @@ static void handle_alpha_block_mode5(const color_quad_u8* pPixels, const bc7enc_ } } -static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) +static void handle_alpha_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) { + assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 5)) || (pComp_params->m_mode_mask & (1 << 7))); + pParams->m_pSelector_weights = g_bc7_weights4; pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x; pParams->m_num_selector_weights = 16; pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = true; pParams->m_perceptual = pComp_params->m_perceptual; pParams->m_num_pixels = 16; pParams->m_pPixels = pPixels; bc7_optimization_results opt_results6, opt_results5, opt_results7; - color_cell_compressor_results results6; - results6.m_pSelectors = opt_results6.m_selectors; + memset(&results6, 0, sizeof(results6)); + + uint64_t best_err = UINT64_MAX; + uint32_t best_mode = 0; uint8_t selectors_temp[16]; - results6.m_pSelectors_temp = selectors_temp; - uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params); - uint32_t best_mode = 6; + if (pComp_params->m_mode_mask & (1 << 6)) + { + results6.m_pSelectors = opt_results6.m_selectors; + results6.m_pSelectors_temp = selectors_temp; - if ((best_err > 0) && (pComp_params->m_use_mode5_for_alpha)) + best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f); + best_mode = 6; + } + + if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 5))) { uint32_t lo_a = 255, hi_a = 0; for (uint32_t i = 0; i < 16; i++) { - uint32_t a = pPixels[i].a; + uint32_t a = pPixels[i].m_c[3]; lo_a = minimumu(lo_a, a); hi_a = maximumu(hi_a, a); } @@ -1974,6 +2181,8 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const uint64_t mode5_err, mode5_alpha_err; handle_alpha_block_mode5(pPixels, pComp_params, pParams, lo_a, hi_a, &opt_results5, &mode5_err, &mode5_alpha_err); + mode5_err = (uint64_t)(mode5_err * pComp_params->m_mode5_error_weight + .5f); + if (mode5_err < best_err) { best_err = mode5_err; @@ -1981,7 +2190,7 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const } } - if ((best_err > 0) && (pComp_params->m_use_mode7_for_alpha)) + if ((best_err > 0) && (pComp_params->m_mode_mask & (1 << 7))) { const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 7); @@ -1989,13 +2198,13 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights2x; pParams->m_num_selector_weights = 4; pParams->m_comp_bits = 5; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; - pParams->m_has_alpha = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + pParams->m_has_alpha = true; const uint8_t* pPartition = &g_bc7_partition2[trial_partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors7[2] = { 0, 0 }; @@ -2022,14 +2231,16 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const pResults->m_pSelectors_temp = selectors_temp; uint64_t err = color_cell_compression(7, pParams, pResults, pComp_params); trial_err += err; - if (trial_err > best_err) + if ((uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f) > best_err) break; } // subset - if (trial_err < best_err) + const uint64_t mode7_trial_err = (uint64_t)(trial_err * pComp_params->m_mode7_error_weight + .5f); + + if (mode7_trial_err < best_err) { - best_err = trial_err; + best_err = mode7_trial_err; best_mode = 7; opt_results7.m_mode = 7; opt_results7.m_partition = trial_partition; @@ -2073,43 +2284,59 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const encode_bc7_block(pBlock, &opt_results6); } + else + { + assert(0); + } } -static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) +static void handle_opaque_block(void *pBlock, const color_rgba *pPixels, const bc7enc_compress_block_params *pComp_params, color_cell_compressor_params *pParams) { + assert((pComp_params->m_mode_mask & (1 << 6)) || (pComp_params->m_mode_mask & (1 << 1))); + uint8_t selectors_temp[16]; - - // Mode 6 + bc7_optimization_results opt_results; - - pParams->m_pSelector_weights = g_bc7_weights4; - pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x; - pParams->m_num_selector_weights = 16; - pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_FALSE; + + uint64_t best_err = UINT64_MAX; + pParams->m_perceptual = pComp_params->m_perceptual; pParams->m_num_pixels = 16; pParams->m_pPixels = pPixels; - pParams->m_has_alpha = BC7ENC_FALSE; - - color_cell_compressor_results results6; - results6.m_pSelectors = opt_results.m_selectors; - results6.m_pSelectors_temp = selectors_temp; - - uint64_t best_err = color_cell_compression(6, pParams, &results6, pComp_params); - opt_results.m_mode = 6; opt_results.m_partition = 0; - opt_results.m_low[0] = results6.m_low_endpoint; - opt_results.m_high[0] = results6.m_high_endpoint; - opt_results.m_pbits[0][0] = results6.m_pbits[0]; - opt_results.m_pbits[0][1] = results6.m_pbits[1]; opt_results.m_index_selector = 0; opt_results.m_rotation = 0; + // Mode 6 + if (pComp_params->m_mode_mask & (1 << 6)) + { + pParams->m_pSelector_weights = g_bc7_weights4; + pParams->m_pSelector_weightsx = (const vec4F*)g_bc7_weights4x; + pParams->m_num_selector_weights = 16; + pParams->m_comp_bits = 7; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = false; + + // This means mode has alpha even though this is an opaque block + // so deal with the error on alpha too. + pParams->m_has_alpha = true; + + color_cell_compressor_results results6; + results6.m_pSelectors = opt_results.m_selectors; + results6.m_pSelectors_temp = selectors_temp; + + best_err = (uint64_t)(color_cell_compression(6, pParams, &results6, pComp_params) * pComp_params->m_mode6_error_weight + .5f); + + opt_results.m_mode = 6; + opt_results.m_low[0] = results6.m_low_endpoint; + opt_results.m_high[0] = results6.m_high_endpoint; + opt_results.m_pbits[0][0] = results6.m_pbits[0]; + opt_results.m_pbits[0][1] = results6.m_pbits[1]; + } + // Mode 1 - if ((best_err > 0) && (pComp_params->m_max_partitions_mode > 0)) + if ((best_err > 0) && (pComp_params->m_max_partitions > 0) && (pComp_params->m_mode_mask & (1 << 1))) { const uint32_t trial_partition = estimate_partition(pPixels, pComp_params, pParams->m_weights, 1); @@ -2117,12 +2344,13 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights3x; pParams->m_num_selector_weights = 8; pParams->m_comp_bits = 6; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_endpoints_share_pbit = BC7ENC_TRUE; + pParams->m_has_pbits = true; + pParams->m_endpoints_share_pbit = true; + pParams->m_has_alpha = false; const uint8_t *pPartition = &g_bc7_partition2[trial_partition * 16]; - color_quad_u8 subset_colors[2][16]; + color_rgba subset_colors[2][16]; uint32_t subset_total_colors1[2] = { 0, 0 }; @@ -2150,14 +2378,15 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons uint64_t err = color_cell_compression(1, pParams, pResults, pComp_params); trial_err += err; - if (trial_err > best_err) + if ((uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f) > best_err) break; } // subset - if (trial_err < best_err) + const uint64_t mode1_trial_err = (uint64_t)(trial_err * pComp_params->m_mode1_error_weight + .5f); + if (mode1_trial_err < best_err) { - best_err = trial_err; + best_err = mode1_trial_err; opt_results.m_mode = 1; opt_results.m_partition = trial_partition; for (uint32_t subset = 0; subset < 2; subset++) @@ -2171,14 +2400,16 @@ static void handle_opaque_block(void *pBlock, const color_quad_u8 *pPixels, cons } } + pParams->m_has_alpha = false; + encode_bc7_block(pBlock, &opt_results); } -bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params) +bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params) { assert(g_bc7_mode_1_optimal_endpoints[255][0].m_hi != 0); - const color_quad_u8 *pPixels = (const color_quad_u8 *)(pPixelsRGBA); + const color_rgba *pPixels = (const color_rgba *)(pPixelsRGBA); color_cell_compressor_params params; if (pComp_params->m_perceptual) @@ -2193,25 +2424,133 @@ bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const b } else memcpy(params.m_weights, pComp_params->m_weights, sizeof(params.m_weights)); + + if (pComp_params->m_force_alpha) + { + handle_alpha_block(pBlock, pPixels, pComp_params, ¶ms); + return true; + } for (uint32_t i = 0; i < 16; i++) { - if (pPixels[i].a < 255) + if (pPixels[i].m_c[3] < 255) { handle_alpha_block(pBlock, pPixels, pComp_params, ¶ms); - return BC7ENC_TRUE; + return true; } } handle_opaque_block(pBlock, pPixels, pComp_params, ¶ms); - return BC7ENC_FALSE; + return false; } +/* +static const uint8_t g_tdefl_small_dist_extra[512] = +{ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 +}; + +static const uint8_t g_tdefl_large_dist_extra[128] = +{ + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 +}; + +static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes) +{ + assert(match_len_in_bytes <= 258); + + uint32_t len_cost = 6; + if (match_len_in_bytes >= 12) + len_cost = 9; + else if (match_len_in_bytes >= 8) + len_cost = 8; + else if (match_len_in_bytes >= 6) + len_cost = 7; + + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[std::min(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; +} +*/ +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +/* +static inline float compute_block_max_std_dev(const color_rgba* pPixels) +{ + tracked_stat r_stats, g_stats, b_stats, a_stats; + + for (uint32_t i = 0; i < 16; i++) + { + r_stats.update(pPixels[i].m_c[0]); + g_stats.update(pPixels[i].m_c[1]); + b_stats.update(pPixels[i].m_c[2]); + a_stats.update(pPixels[i].m_c[3]); + } + + return std::max(std::max(std::max(r_stats.get_std_dev(), g_stats.get_std_dev()), b_stats.get_std_dev()), a_stats.get_std_dev()); +} +*/ +struct bc7_block +{ + uint8_t m_bytes[16]; + + uint32_t get_mode() const + { + uint32_t bc7_mode = 0; + while (((m_bytes[0] & (1 << bc7_mode)) == 0) && (bc7_mode < 8)) + bc7_mode++; + return bc7_mode; + } +}; + /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. +If you use this software in a product, attribution / credits is requested but not required. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License -Copyright(c) 2020 Richard Geldreich, Jr. +Copyright(c) 2020-2021 Richard Geldreich, Jr. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files(the "Software"), to deal in the Software without restriction, including without limitation the rights to diff --git a/libkram/bc7enc/bc7enc.h b/libkram/bc7enc/bc7enc.h index 2dbd9101..8794c15d 100644 --- a/libkram/bc7enc/bc7enc.h +++ b/libkram/bc7enc/bc7enc.h @@ -1,23 +1,22 @@ // File: bc7enc.h - Richard Geldreich, Jr. - MIT license or public domain (see end of bc7enc.c) +// If you use this software in a product, attribution / credits is requested but not required. #include #include - -#ifdef __cplusplus -extern "C" { -#endif +#include +#include #define BC7ENC_BLOCK_SIZE (16) -#define BC7ENC_MAX_PARTITIONS1 (64) +#define BC7ENC_MAX_PARTITIONS (64) #define BC7ENC_MAX_UBER_LEVEL (4) -typedef uint8_t bc7enc_bool; -#define BC7ENC_TRUE (1) -#define BC7ENC_FALSE (0) +struct color_rgba { uint8_t m_c[4]; }; -typedef struct +struct bc7enc_compress_block_params { - // m_max_partitions_mode may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality. - uint32_t m_max_partitions_mode; + uint32_t m_mode_mask; + + // m_max_partitions may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS. The higher this value, the slower the compressor, but the higher the quality. + uint32_t m_max_partitions; // Relative RGBA or YCbCrA weights. uint32_t m_weights[4]; @@ -26,23 +25,58 @@ typedef struct uint32_t m_uber_level; // If m_perceptual is true, colorspace error is computed in YCbCr space, otherwise RGB. - bc7enc_bool m_perceptual; + bool m_perceptual; // Set m_try_least_squares to false for slightly faster/lower quality compression. - bc7enc_bool m_try_least_squares; + bool m_try_least_squares; - // When m_mode_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful. + // When m_mode17_partition_estimation_filterbank, the mode1 partition estimator skips lesser used partition patterns unless they are strongly predicted to be potentially useful. // There's a slight loss in quality with this enabled (around .08 dB RGB PSNR or .05 dB Y PSNR), but up to a 11% gain in speed depending on the other settings. - bc7enc_bool m_mode_partition_estimation_filterbank; + bool m_mode17_partition_estimation_filterbank; + + bool m_force_alpha; + + bool m_force_selectors; + uint8_t m_selectors[16]; + + bool m_quant_mode6_endpoints; + bool m_bias_mode1_pbits; - bc7enc_bool m_use_mode5_for_alpha; - bc7enc_bool m_use_mode7_for_alpha; + float m_pbit1_weight; -} bc7enc_compress_block_params; + float m_mode1_error_weight; + float m_mode5_error_weight; + float m_mode6_error_weight; + float m_mode7_error_weight; + + float m_low_frequency_partition_weight; + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void print() + { + printf("Mode mask: 0x%X\n", m_mode_mask); + printf("Max partitions: %u\n", m_max_partitions); + printf("Weights: %u %u %u %u\n", m_weights[0], m_weights[1], m_weights[2], m_weights[3]); + printf("Uber level: %u\n", m_uber_level); + printf("Perceptual: %u\n", m_perceptual); + printf("Try least squares: %u\n", m_try_least_squares); + printf("Mode 1/7 partition estimation filterbank: %u\n", m_mode17_partition_estimation_filterbank); + printf("Force alpha: %u\n", m_force_alpha); + printf("Quant mode 6 endpoints: %u\n", m_quant_mode6_endpoints); + printf("Bias mode 1 p-bits: %u\n", m_bias_mode1_pbits); + printf("p-bit 1 weight: %f\n", m_pbit1_weight); + printf("Mode error weights: %f %f %f %f\n", m_mode1_error_weight, m_mode5_error_weight, m_mode6_error_weight, m_mode7_error_weight); + printf("Low frequency partition weight: %f\n", m_low_frequency_partition_weight); + } +}; inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params *p) { - p->m_perceptual = BC7ENC_FALSE; + p->m_perceptual = false; p->m_weights[0] = 1; p->m_weights[1] = 1; p->m_weights[2] = 1; @@ -51,7 +85,7 @@ inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_blo inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress_block_params *p) { - p->m_perceptual = BC7ENC_TRUE; + p->m_perceptual = true; p->m_weights[0] = 128; p->m_weights[1] = 64; p->m_weights[2] = 16; @@ -60,23 +94,30 @@ inline void bc7enc_compress_block_params_init_perceptual_weights(bc7enc_compress inline void bc7enc_compress_block_params_init(bc7enc_compress_block_params *p) { - p->m_max_partitions_mode = BC7ENC_MAX_PARTITIONS1; - p->m_try_least_squares = BC7ENC_TRUE; - p->m_mode_partition_estimation_filterbank = BC7ENC_TRUE; + p->m_mode_mask = UINT32_MAX; + p->m_max_partitions = BC7ENC_MAX_PARTITIONS; + p->m_try_least_squares = true; + p->m_mode17_partition_estimation_filterbank = true; p->m_uber_level = 0; - p->m_use_mode5_for_alpha = BC7ENC_TRUE; - p->m_use_mode7_for_alpha = BC7ENC_TRUE; + p->m_force_selectors = false; + p->m_force_alpha = false; + p->m_quant_mode6_endpoints = false; + p->m_bias_mode1_pbits = false; + p->m_pbit1_weight = 1.0f; + p->m_mode1_error_weight = 1.0f; + p->m_mode5_error_weight = 1.0f; + p->m_mode6_error_weight = 1.0f; + p->m_mode7_error_weight = 1.0f; + p->m_low_frequency_partition_weight = 1.0f; bc7enc_compress_block_params_init_perceptual_weights(p); } // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). void bc7enc_compress_block_init(); -// Packs a single block of 4x4=16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6. +// Packs a single block of 16x16 RGBA pixels (R first in memory) to 128-bit BC7 block pBlock, using either mode 1 and/or 6. // Alpha blocks will always use mode 6, and by default opaque blocks will use either modes 1 or 6. -// Returns BC7ENC_TRUE if the block had any pixels with alpha < 255, otherwise it return BC7ENC_FALSE. (This is not an error code - a block is always encoded.) -bc7enc_bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params); +// Returns true if the block had any pixels with alpha < 255, otherwise it return false. (This is not an error code - a block is always encoded.) +bool bc7enc_compress_block(void *pBlock, const void *pPixelsRGBA, const bc7enc_compress_block_params *pComp_params); + -#ifdef __cplusplus -} -#endif diff --git a/libkram/bc7enc/ert.cpp b/libkram/bc7enc/ert.cpp new file mode 100644 index 00000000..6fc2459d --- /dev/null +++ b/libkram/bc7enc/ert.cpp @@ -0,0 +1,705 @@ +#include "ert.h" +#include +#include +#include +#include "utils.h" + +#define ERT_FAVOR_CONT_AND_REP0_MATCHES (1) +#define ERT_FAVOR_REP0_MATCHES (0) + +namespace ert +{ + const uint32_t MAX_BLOCK_PIXELS = 12 * 12; + const uint32_t MAX_BLOCK_SIZE_IN_BYTES = 256; + const uint32_t MIN_MATCH_LEN = 3; + const float LITERAL_BITS = 13.0f; + const float MATCH_CONTINUE_BITS = 1.0f; + const float MATCH_REP0_BITS = 4.0f; + + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } + + static const uint8_t g_tdefl_small_dist_extra[512] = + { + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static const uint8_t g_tdefl_large_dist_extra[128] = + { + 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 + }; + + static inline uint32_t compute_match_cost_estimate(uint32_t dist, uint32_t match_len_in_bytes) + { + assert(match_len_in_bytes <= 258); + + uint32_t len_cost = 6; + if (match_len_in_bytes >= 12) + len_cost = 9; + else if (match_len_in_bytes >= 8) + len_cost = 8; + else if (match_len_in_bytes >= 6) + len_cost = 7; + + uint32_t dist_cost = 5; + if (dist < 512) + dist_cost += g_tdefl_small_dist_extra[dist & 511]; + else + { + dist_cost += g_tdefl_large_dist_extra[std::min(dist, 32767) >> 8]; + while (dist >= 32768) + { + dist_cost++; + dist >>= 1; + } + } + return len_cost + dist_cost; + } + + class tracked_stat + { + public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; }; + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + float get_variance() const { float s = get_std_dev(); return s * s; } + + private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; + }; + + static inline float compute_block_max_std_dev(const color_rgba* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps) + { + tracked_stat comp_stats[4]; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba* pPixel = pPixels + x + y * block_width; + + for (uint32_t c = 0; c < num_comps; c++) + comp_stats[c].update(pPixel->m_c[c]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + return max_std_dev; + } + + static inline float compute_block_mse(const color_rgba* pPixelsA, const color_rgba* pPixelsB, uint32_t block_width, uint32_t block_height, uint32_t total_block_pixels, uint32_t num_comps, const uint32_t weights[4], float one_over_total_color_weight) + { + uint64_t total_err = 0; + + if ((block_width == 4) && (block_height == 4) && (num_comps == 4)) + { + if ((weights[0] == 1) && (weights[1] == 1) && (weights[2] == 1) && (weights[3] == 1)) + { + for (uint32_t i = 0; i < 16; i++) + { + const color_rgba* pA = pPixelsA + i; + const color_rgba* pB = pPixelsB + i; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + const int da = pA->m_c[3] - pB->m_c[3]; + + total_err += dr * dr + dg * dg + db * db + da * da; + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const color_rgba* pA = pPixelsA + i; + const color_rgba* pB = pPixelsB + i; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + const int da = pA->m_c[3] - pB->m_c[3]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db + weights[3] * da * da; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 3)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + const int db = pA->m_c[2] - pB->m_c[2]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg + weights[2] * db * db; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 2)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + const int dg = pA->m_c[1] - pB->m_c[1]; + + total_err += weights[0] * dr * dr + weights[1] * dg * dg; + } + } + } + else if ((block_width == 4) && (block_height == 4) && (num_comps == 1)) + { + for (uint32_t y = 0; y < 4; y++) + { + const uint32_t y_ofs = y * 4; + for (uint32_t x = 0; x < 4; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + const int dr = pA->m_c[0] - pB->m_c[0]; + + total_err += weights[0] * dr * dr; + } + } + } + else + { + for (uint32_t y = 0; y < block_height; y++) + { + const uint32_t y_ofs = y * block_width; + for (uint32_t x = 0; x < block_width; x++) + { + const color_rgba* pA = pPixelsA + x + y_ofs; + const color_rgba* pB = pPixelsB + x + y_ofs; + + for (uint32_t c = 0; c < num_comps; c++) + { + const int d = pA->m_c[c] - pB->m_c[c]; + total_err += weights[c] * d * d; + } + } + } + } + + return total_err * (one_over_total_color_weight / total_block_pixels); + } + + uint32_t hash_hsieh(const uint8_t* pBuf, size_t len, uint32_t salt) + { + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len + (salt << 16)); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; + } + + // BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations + bool reduce_entropy(void* pBlocks, uint32_t num_blocks, + uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps, + const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified, + pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data, + vector* pBlock_mse_scales) + { + assert(total_block_stride_in_bytes && block_size_to_optimize_in_bytes); + assert(total_block_stride_in_bytes >= block_size_to_optimize_in_bytes); + + assert(num_comps >= 1 && num_comps <= 4); + for (uint32_t i = num_comps; i < 4; i++) + { + assert(!params.m_color_weights[i]); + if (params.m_color_weights[i]) + return false; + } + + const uint32_t total_color_weight = params.m_color_weights[0] + params.m_color_weights[1] + params.m_color_weights[2] + params.m_color_weights[3]; + assert(total_color_weight); + const float one_over_total_color_weight = 1.0f / total_color_weight; + + assert((block_size_to_optimize_in_bytes >= MIN_MATCH_LEN) && (block_size_to_optimize_in_bytes <= MAX_BLOCK_SIZE_IN_BYTES)); + if ((block_size_to_optimize_in_bytes < MIN_MATCH_LEN) || (block_size_to_optimize_in_bytes > MAX_BLOCK_SIZE_IN_BYTES)) + return false; + + uint8_t* pBlock_bytes = (uint8_t*)pBlocks; + + const uint32_t total_block_pixels = block_width * block_height; + if (total_block_pixels > MAX_BLOCK_PIXELS) + return false; + + const int total_blocks_to_check = std::max(1U, params.m_lookback_window_size / total_block_stride_in_bytes); + + vector len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1); + vector second_len_hist(MAX_BLOCK_SIZE_IN_BYTES + 1); + uint32_t total_second_matches = 0; + + int prev_match_window_ofs_to_favor_cont = -1, prev_match_dist_to_favor = -1; + + uint32_t total_smooth_blocks = 0; + + const uint32_t HASH_SIZE = 8192; + uint32_t hash[HASH_SIZE]; + + for (uint32_t block_index = 0; block_index < num_blocks; block_index++) + { + if ((block_index & 0xFF) == 0) + memset(hash, 0, sizeof(hash)); + + uint8_t* pOrig_block = &pBlock_bytes[block_index * total_block_stride_in_bytes]; + const color_rgba* pPixels = &pBlock_pixels[block_index * total_block_pixels]; + + color_rgba decoded_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(pOrig_block, decoded_block, block_index, pUnpack_block_func_user_data)) + return false; + + float cur_mse = compute_block_mse(pPixels, decoded_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if ((params.m_skip_zero_mse_blocks) && (cur_mse == 0.0f)) + continue; + + const float max_std_dev = compute_block_max_std_dev(pPixels, block_width, block_height, num_comps); + + float yl = clampf(max_std_dev / params.m_max_smooth_block_std_dev, 0.0f, 1.0f); + yl = yl * yl; + float smooth_block_mse_scale = lerp(params.m_smooth_block_max_mse_scale, 1.0f, yl); + + if (pBlock_mse_scales) + { + if ((*pBlock_mse_scales)[block_index] > 0.0f) + { + smooth_block_mse_scale = (*pBlock_mse_scales)[block_index]; + } + } + + if (smooth_block_mse_scale > 1.0f) + total_smooth_blocks++; + + float cur_bits = (LITERAL_BITS * block_size_to_optimize_in_bytes); + float cur_t = cur_mse * smooth_block_mse_scale + cur_bits * params.m_lambda; + + int first_block_to_check = std::max(0, block_index - total_blocks_to_check); + int last_block_to_check = block_index - 1; + + uint8_t best_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(best_block, pOrig_block, block_size_to_optimize_in_bytes); + + float best_t = cur_t; + uint32_t best_match_len = 0, best_match_src_window_ofs = 0, best_match_dst_window_ofs = 0, best_match_src_block_ofs = 0, best_match_dst_block_ofs = 0; + float best_match_bits = 0; + + // Don't let thresh_ms_err be 0 to let zero error blocks have slightly increased distortion + const float thresh_ms_err = params.m_max_allowed_rms_increase_ratio * params.m_max_allowed_rms_increase_ratio * std::max(cur_mse, 1.0f); + + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes]; + + for (uint32_t len = block_size_to_optimize_in_bytes; len >= MIN_MATCH_LEN; len--) + { + if (params.m_allow_relative_movement) + { + for (uint32_t src_ofs = 0; src_ofs <= (block_size_to_optimize_in_bytes - len); src_ofs++) + { + assert(len + src_ofs <= block_size_to_optimize_in_bytes); + + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + src_ofs; + + for (uint32_t dst_ofs = 0; dst_ofs <= (block_size_to_optimize_in_bytes - len); dst_ofs++) + { + assert(len + dst_ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + dst_ofs; + + const uint32_t match_dist = dst_match_window_ofs - src_match_window_ofs; + + float trial_match_bits, trial_total_bits; + + uint32_t hs = hash_hsieh(pPrev_blk + src_ofs, len, dst_ofs); + +#if ERT_FAVOR_CONT_AND_REP0_MATCHES + // Continue a previous match (which would cross block boundaries) + if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (dst_ofs == 0)) + { + trial_match_bits = MATCH_CONTINUE_BITS; + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS; + } + // Exploit REP0 matches + else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor))) + { + trial_match_bits = MATCH_REP0_BITS; + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS; + } + else + { + trial_match_bits = (float)compute_match_cost_estimate(match_dist, len); + trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits; + + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } + } +#else + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } +#endif + + hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF); + + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + dst_ofs, pPrev_blk + src_ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_match_len = len; + best_match_src_window_ofs = src_match_window_ofs; + best_match_dst_window_ofs = dst_match_window_ofs; + best_match_src_block_ofs = src_ofs; + best_match_dst_block_ofs = dst_ofs; + best_match_bits = trial_match_bits; + } + } + + } // dst_ofs + } // src_ofs + } + else + { + const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes; + + // Assume the block has 1 match and block_size_to_optimize_in_bytes-match_len literals. + const float trial_match_bits = (float)compute_match_cost_estimate(match_dist, len); + const float trial_total_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + trial_match_bits; + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++) + { + assert(len + ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs; + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs; + + float trial_match_bits_to_use = trial_match_bits; + float trial_total_bits_times_lambda_to_use = trial_total_bits_times_lambda; + + uint32_t hs = hash_hsieh(pPrev_blk + ofs, len, ofs); + +#if ERT_FAVOR_CONT_AND_REP0_MATCHES + // Continue a previous match (which would cross block boundaries) + if (((int)src_match_window_ofs == prev_match_window_ofs_to_favor_cont) && (ofs == 0)) + { + float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_CONTINUE_BITS; + trial_match_bits_to_use = MATCH_CONTINUE_BITS; + trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda; + } + // Exploit REP0 matches + else if ((prev_match_dist_to_favor != -1) && (src_match_window_ofs == (dst_match_window_ofs - prev_match_dist_to_favor))) + { + float continue_match_trial_bits = (block_size_to_optimize_in_bytes - len) * LITERAL_BITS + MATCH_REP0_BITS; + trial_match_bits_to_use = MATCH_REP0_BITS; + trial_total_bits_times_lambda_to_use = continue_match_trial_bits * params.m_lambda; + } + else + { + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } + } +#else + uint32_t hash_check = hash[hs & (HASH_SIZE - 1)]; + if ((hash_check & 0xFF) == (block_index & 0xFF)) + { + if ((hash_check >> 8) == (hs >> 8)) + continue; + } +#endif + + hash[hs & (HASH_SIZE - 1)] = (hs & 0xFFFFFF00) | (block_index & 0xFF); + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, pOrig_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + ofs, pPrev_blk + ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda_to_use; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_match_len = len; + best_match_src_window_ofs = src_match_window_ofs; + best_match_dst_window_ofs = dst_match_window_ofs; + best_match_src_block_ofs = ofs; + best_match_dst_block_ofs = ofs; + best_match_bits = trial_match_bits_to_use; + } + } + } // ofs + } + + } // len + + } // prev_block_index + + if (best_t < cur_t) + { + uint32_t best_second_match_len = 0, best_second_match_src_window_ofs = 0, best_second_match_dst_window_ofs = 0, best_second_match_src_block_ofs = 0, best_second_match_dst_block_ofs = 0; + + // Try injecting a second match, being sure it does't overlap with the first. + if ((params.m_try_two_matches) && (best_match_len <= (block_size_to_optimize_in_bytes - 3))) + { + uint8_t matched_flags[MAX_BLOCK_SIZE_IN_BYTES]; + memset(matched_flags, 0, sizeof(matched_flags)); + memset(matched_flags + best_match_dst_block_ofs, 1, best_match_len); + + uint8_t orig_best_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(orig_best_block, best_block, block_size_to_optimize_in_bytes); + + for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) + { + const uint8_t* pPrev_blk = &pBlock_bytes[prev_block_index * total_block_stride_in_bytes]; + + const uint32_t match_dist = (block_index - prev_block_index) * total_block_stride_in_bytes; + + for (uint32_t len = 3; len <= (block_size_to_optimize_in_bytes - best_match_len); len++) + { + const float trial_total_bits = (block_size_to_optimize_in_bytes - len - best_match_len) * LITERAL_BITS + compute_match_cost_estimate(match_dist, len) + best_match_bits; + + const float trial_total_bits_times_lambda = trial_total_bits * params.m_lambda; + + for (uint32_t ofs = 0; ofs <= (block_size_to_optimize_in_bytes - len); ofs++) + { + int i; + for (i = 0; i < (int)len; i++) + if (matched_flags[ofs + i]) + break; + if (i != (int)len) + continue; + + assert(len + ofs <= block_size_to_optimize_in_bytes); + + const uint32_t dst_match_window_ofs = block_index * total_block_stride_in_bytes + ofs; + const uint32_t src_match_window_ofs = prev_block_index * total_block_stride_in_bytes + ofs; + + uint8_t trial_block[MAX_BLOCK_SIZE_IN_BYTES]; + memcpy(trial_block, orig_best_block, block_size_to_optimize_in_bytes); + memcpy(trial_block + ofs, pPrev_blk + ofs, len); + + color_rgba decoded_trial_block[MAX_BLOCK_PIXELS]; + if (!(*pUnpack_block_func)(trial_block, decoded_trial_block, block_index, pUnpack_block_func_user_data)) + continue; + + float trial_mse = compute_block_mse(pPixels, decoded_trial_block, block_width, block_height, total_block_pixels, num_comps, params.m_color_weights, one_over_total_color_weight); + + if (trial_mse < thresh_ms_err) + { + float t = trial_mse * smooth_block_mse_scale + trial_total_bits_times_lambda; + + if (t < best_t) + { + best_t = t; + memcpy(best_block, trial_block, block_size_to_optimize_in_bytes); + best_second_match_len = len; + best_second_match_src_window_ofs = src_match_window_ofs; + best_second_match_dst_window_ofs = dst_match_window_ofs; + best_second_match_src_block_ofs = ofs; + best_second_match_dst_block_ofs = ofs; + } + } + } + } + } + } + + memcpy(pOrig_block, best_block, block_size_to_optimize_in_bytes); + total_modified++; + + if ((best_second_match_len == 0) || (best_match_dst_window_ofs > best_second_match_dst_window_ofs)) + { + int best_match_dist = best_match_dst_window_ofs - best_match_src_window_ofs; + assert(best_match_dist >= 1); + (void)best_match_dist; + + if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes) + { + // If the match goes all the way to the end of a block, we can try to continue it on the next encoded block. + if ((best_match_dst_block_ofs + best_match_len) == total_block_stride_in_bytes) + prev_match_window_ofs_to_favor_cont = best_match_src_window_ofs + best_match_len; + else + prev_match_window_ofs_to_favor_cont = -1; + } + +#if ERT_FAVOR_REP0_MATCHES + // Compute the window offset where a cheaper REP0 match would be available + prev_match_dist_to_favor = best_match_dist; +#endif + } + else + { + int best_match_dist = best_second_match_dst_window_ofs - best_second_match_src_window_ofs; + assert(best_match_dist >= 1); + (void)best_match_dist; + + if (block_size_to_optimize_in_bytes == total_block_stride_in_bytes) + { + // If the match goes all the way to the end of a block, we can try to continue it on the next encoded block. + if ((best_second_match_dst_block_ofs + best_second_match_len) == total_block_stride_in_bytes) + prev_match_window_ofs_to_favor_cont = best_second_match_src_window_ofs + best_second_match_len; + else + prev_match_window_ofs_to_favor_cont = -1; + } + +#if ERT_FAVOR_REP0_MATCHES + // Compute the window offset where a cheaper REP0 match would be available + prev_match_dist_to_favor = best_match_dist; +#endif + } + + len_hist[best_match_len]++; + + if (best_second_match_len) + { + second_len_hist[best_second_match_len]++; + total_second_matches++; + } + } + else + { + prev_match_window_ofs_to_favor_cont = -1; + } + + } // block_index + + if (params.m_debug_output) + { + printf("Total smooth blocks: %3.2f%%\n", total_smooth_blocks * 100.0f / num_blocks); + + printf("Match length histogram:\n"); + for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++) + printf("%u%c", len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n'); + + printf("Total second matches: %u %3.2f%%\n", total_second_matches, total_second_matches * 100.0f / num_blocks); + printf("Secod match length histogram:\n"); + for (uint32_t i = MIN_MATCH_LEN; i <= block_size_to_optimize_in_bytes; i++) + printf("%u%c", second_len_hist[i], (i < block_size_to_optimize_in_bytes) ? ',' : '\n'); + } + + return true; + } + +} // namespace ert + diff --git a/libkram/bc7enc/ert.h b/libkram/bc7enc/ert.h new file mode 100644 index 00000000..03268a1e --- /dev/null +++ b/libkram/bc7enc/ert.h @@ -0,0 +1,83 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +//#include +//#include +//#include + +namespace ert +{ + using namespace STL_NAMESPACE; + + struct color_rgba { uint8_t m_c[4]; }; + + struct reduce_entropy_params + { + // m_lambda: The post-processor tries to reduce distortion*smooth_block_scale + rate*lambda (rate is approximate LZ bits and distortion is scaled MS error multiplied against the smooth block MSE weighting factor). + // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. + float m_lambda; + + // m_lookback_window_size: The number of bytes the encoder can look back from each block to find matches. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. + uint32_t m_lookback_window_size; + + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. + float m_max_allowed_rms_increase_ratio; + + float m_max_smooth_block_std_dev; + float m_smooth_block_max_mse_scale; + + uint32_t m_color_weights[4]; + + bool m_try_two_matches; + bool m_allow_relative_movement; + bool m_skip_zero_mse_blocks; + bool m_debug_output; + + reduce_entropy_params() { clear(); } + + void clear() + { + m_lookback_window_size = 256; + m_lambda = 1.0f; + m_max_allowed_rms_increase_ratio = 10.0f; + m_max_smooth_block_std_dev = 18.0f; + m_smooth_block_max_mse_scale = 10.0f; + m_color_weights[0] = 1; + m_color_weights[1] = 1; + m_color_weights[2] = 1; + m_color_weights[3] = 1; + m_try_two_matches = false; + m_allow_relative_movement = false; + m_skip_zero_mse_blocks = false; + m_debug_output = false; + } + + void print() + { + printf("lambda: %f\n", m_lambda); + printf("Lookback window size: %u\n", m_lookback_window_size); + printf("Max allowed RMS increase ratio: %f\n", m_max_allowed_rms_increase_ratio); + printf("Max smooth block std dev: %f\n", m_max_smooth_block_std_dev); + printf("Smooth block max MSE scale: %f\n", m_smooth_block_max_mse_scale); + printf("Color weights: %u %u %u %u\n", m_color_weights[0], m_color_weights[1], m_color_weights[2], m_color_weights[3]); + printf("Try two matches: %u\n", m_try_two_matches); + printf("Allow relative movement: %u\n", m_allow_relative_movement); + printf("Skip zero MSE blocks: %u\n", m_skip_zero_mse_blocks); + } + }; + + typedef bool (*pUnpack_block_func)(const void* pBlock, color_rgba* pPixels, uint32_t block_index, void* pUser_data); + + // BC7 entropy reduction transform with Deflate/LZMA/LZHAM optimizations + bool reduce_entropy(void* pBlocks, uint32_t num_blocks, + uint32_t total_block_stride_in_bytes, uint32_t block_size_to_optimize_in_bytes, uint32_t block_width, uint32_t block_height, uint32_t num_comps, + const color_rgba* pBlock_pixels, const reduce_entropy_params& params, uint32_t& total_modified, + pUnpack_block_func pUnpack_block_func, void* pUnpack_block_func_user_data, + vector* pBlock_mse_scales = nullptr); + +} // namespace ert diff --git a/libkram/bc7enc/rdo_bc_encoder.cpp b/libkram/bc7enc/rdo_bc_encoder.cpp new file mode 100644 index 00000000..8f51f8e4 --- /dev/null +++ b/libkram/bc7enc/rdo_bc_encoder.cpp @@ -0,0 +1,1270 @@ +// rdo_bc_encoder.cpp +#include "rdo_bc_encoder.h" + +#define RGBCX_IMPLEMENTATION +#include "rgbcx.h" + +#define DECODE_BC4_TO_GRAYSCALE (0) + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +namespace rdo_bc +{ + using namespace utils; + + static const char* get_dxgi_format_string(DXGI_FORMAT fmt) + { + switch (fmt) + { + case DXGI_FORMAT_BC1_UNORM: return "BC1_UNORM"; + case DXGI_FORMAT_BC4_UNORM: return "BC4_UNORM"; + case DXGI_FORMAT_BC3_UNORM: return "BC3_UNORM"; + case DXGI_FORMAT_BC5_UNORM: return "BC5_UNORM"; + case DXGI_FORMAT_BC7_UNORM: return "BC7_UNORM"; + default: break; + } + return "?"; + } + + static vector compute_block_mse_scales(const image_u8& source_image, uint32_t blocks_x, uint32_t blocks_y, uint32_t total_blocks, bool rdo_debug_output) + { + const float ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD = 2.9f; + const float DARK_THRESHOLD = 13.0f; + const float BRIGHT_THRESHOLD = 222.0f; + const float ULTRAMOOTH_BLOCK_MSE_SCALE = 120.0f; + const uint32_t ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD = 64; + + image_u8 ultrasmooth_blocks_vis(blocks_x, blocks_y); + + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + color_quad_u8 block_pixels[16]; + source_image.get_block(bx, by, 4, 4, block_pixels); + + tracked_stat y_stats; + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + { + int l = block_pixels[x + y * 4].get_luma(); + y_stats.update(l); + } + + float max_std_dev = compute_block_max_std_dev((color_quad_u8*)block_pixels, 4, 4, 3); + + float yl = max_std_dev / ULTRASMOOTH_BLOCK_STD_DEV_THRESHOLD; + + yl = std::clamp(yl, 0.0f, 1.0f); + yl *= yl; + + float y_avg = y_stats.get_mean(); + + if ((y_avg < DARK_THRESHOLD) || (y_avg >= BRIGHT_THRESHOLD)) + yl = 1.0f; + + int k = std::min((int)(yl * 255.0f + .5f), 255); + + ultrasmooth_blocks_vis.fill_box(bx, by, 1, 1, color_quad_u8((uint8_t)k, 255)); + } + } + + for (int pass = 0; pass < 1; pass++) + { + image_u8 next_vis(ultrasmooth_blocks_vis); + + for (int y = 0; y < (int)blocks_y; y++) + { + for (int x = 0; x < (int)blocks_x; x++) + { + int m = 0; + + for (int dy = -1; dy <= 1; dy++) + for (int dx = -1; dx <= 1; dx++) + { + if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255) + m = std::max(m, ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r); + } + + next_vis(x, y).set((uint8_t)m, 255); + } + } + + ultrasmooth_blocks_vis.swap(next_vis); + } + + for (uint32_t pass = 0; pass < 32; pass++) + { + image_u8 next_vis(ultrasmooth_blocks_vis); + for (int y = 0; y < (int)blocks_y; y++) + { + for (int x = 0; x < (int)blocks_x; x++) + { + if (ultrasmooth_blocks_vis.get_clamped(x, y).r < 255) + { + int m = 0; + + for (int dy = -1; dy <= 1; dy++) + for (int dx = -1; dx <= 1; dx++) + if (ultrasmooth_blocks_vis.get_clamped(x + dx, y + dy).r == 255) + m++; + + if (m >= 5) + next_vis.set_pixel_clipped(x, y, color_quad_u8(255, 255, 255, 255)); + } + } + } + ultrasmooth_blocks_vis.swap(next_vis); + } + + image_u8 orig_ultrasmooth_blocks_vis(ultrasmooth_blocks_vis); + + if (rdo_debug_output) + { + save_png("ultrasmooth_block_mask_pre_filter.png", ultrasmooth_blocks_vis, false); + } + + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0; + if (!is_ultrasmooth) + continue; + + vector filled_pixels; + filled_pixels.reserve(256); + + uint32_t total_set_pixels = ultrasmooth_blocks_vis.flood_fill(bx, by, color_quad_u8(255, 255, 255, 255), color_quad_u8(0, 0, 0, 255), &filled_pixels); + + if (total_set_pixels < ULTRASMOOTH_REGION_TOO_SMALL_THRESHOLD) + { + for (uint32_t i = 0; i < filled_pixels.size(); i++) + orig_ultrasmooth_blocks_vis(filled_pixels[i].m_x, filled_pixels[i].m_y) = color_quad_u8(255, 255, 255, 255); + } + + } // bx + } // by + + ultrasmooth_blocks_vis = orig_ultrasmooth_blocks_vis; + + if (rdo_debug_output) + { + save_png("ultrasmooth_block_mask.png", ultrasmooth_blocks_vis, false); + } + + vector block_mse_scales(total_blocks); + + uint32_t total_ultrasmooth_blocks = 0; + for (uint32_t by = 0; by < blocks_y; by++) + { + for (uint32_t bx = 0; bx < blocks_x; bx++) + { + const bool is_ultrasmooth = ultrasmooth_blocks_vis(bx, by).r == 0; + + block_mse_scales[bx + by * blocks_x] = is_ultrasmooth ? ULTRAMOOTH_BLOCK_MSE_SCALE : -1.0f; + + total_ultrasmooth_blocks += is_ultrasmooth; + } + } + + if (rdo_debug_output) + printf("Total ultrasmooth blocks: %3.2f%%\n", total_ultrasmooth_blocks * 100.0f / total_blocks); + + return block_mse_scales; + } + + rdo_bc_encoder::rdo_bc_encoder() : + m_pOrig_source_image(nullptr), + m_orig_width(0), + m_orig_height(0), + m_blocks_x(0), + m_blocks_y(0), + m_total_blocks(0), + m_bytes_per_block(0), + m_pixel_format_bpp(0), + m_total_texels(0), + m_has_alpha(false) + { + } + + void rdo_bc_encoder::clear() + { + m_pOrig_source_image = nullptr; + + m_source_image.clear(); + + m_params.clear(); + + m_orig_width = 0; + m_orig_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_total_blocks = 0; + m_bytes_per_block = 0; + m_pixel_format_bpp = 0; + m_total_texels = 0; + m_has_alpha = false; + + m_packed_image8.clear(); + m_packed_image16.clear(); + + m_prerdo_packed_image8.clear(); + m_prerdo_packed_image16.clear(); + + m_bc7enc_pack_params.clear(); +#if SUPPORT_BC7E + memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params)); +#endif + } + + bool rdo_bc_encoder::init(const utils::image_u8& src_image, rdo_bc_params& params) + { + clear(); + + m_pOrig_source_image = &src_image; + m_params = params; + + init_encoders(); + + if (!init_source_image()) + return false; + + return true; + } + + bool rdo_bc_encoder::encode() + { + if (!m_packed_image8.size() && !m_packed_image16.size()) + return false; + + if (!init_encoder_params()) + return false; + + if (!encode_texture()) + return false; + + if (!postprocess_rdo()) + return false; + + return true; + } + + void rdo_bc_encoder::init_encoders() + { + rgbcx::init(m_params.m_bc1_mode); + bc7enc_compress_block_init(); +#if SUPPORT_BC7E + ispc::bc7e_compress_block_init(); +#endif + } + + bool rdo_bc_encoder::init_encoder_params() + { + bc7enc_compress_block_params_init(&m_bc7enc_pack_params); + if (!m_params.m_perceptual) + bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params); + m_bc7enc_pack_params.m_max_partitions = m_params.m_bc7enc_max_partitions_to_scan; + m_bc7enc_pack_params.m_uber_level = std::min(BC7ENC_MAX_UBER_LEVEL, m_params.m_bc7_uber_level); + + if (m_params.m_bc7enc_mode6_only) + m_bc7enc_pack_params.m_mode_mask = 1 << 6; + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_rdo_lambda > 0.0f)) + { + // Slam off perceptual in RDO mode - we don't support it (too slow). + m_params.m_perceptual = false; + m_bc7enc_pack_params.m_perceptual = false; + bc7enc_compress_block_params_init_linear_weights(&m_bc7enc_pack_params); + } + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_bc7enc_reduce_entropy)) + { + // Configure the BC7 encoder with some decent parameters for later RDO post-processing. + // Textures with alpha are harder for BC7 to handle, so we use more conservative defaults. + + m_bc7enc_pack_params.m_mode17_partition_estimation_filterbank = false; + + if (m_params.m_bc7enc_rdo_bc7_weight_modes) + { + // Weight modes 5 and especially 6 more highly than the other modes. + if (m_has_alpha) + { + m_bc7enc_pack_params.m_mode5_error_weight = .7f; + m_bc7enc_pack_params.m_mode6_error_weight = .6f; + } + else + { + m_bc7enc_pack_params.m_mode6_error_weight = .4f; + } + } + + if (m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions) + { + // Slightly prefer the lower frequency partition patterns. + m_bc7enc_pack_params.m_low_frequency_partition_weight = .9999f; + } + + if (m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints) + { + // As a good default, don't quantize mode 6 endpoints if the texture has alpha. This isn't required, but helps mask textures. + //if (!has_alpha) + m_bc7enc_pack_params.m_quant_mode6_endpoints = true; + } + + if (m_params.m_bc7enc_rdo_bc7_pbit1_weighting) + { + // Favor p-bit 0 vs. 1, to slightly lower the entropy of output blocks with p-bits + m_bc7enc_pack_params.m_pbit1_weight = 1.3f; + } + } + +#if SUPPORT_BC7E + // Now initialize the BC7 compressor's parameters. + + memset(&m_bc7e_pack_params, 0, sizeof(m_bc7e_pack_params)); + switch (m_params.m_bc7_uber_level) + { + case 0: + ispc::bc7e_compress_block_params_init_ultrafast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 1: + ispc::bc7e_compress_block_params_init_veryfast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 2: + ispc::bc7e_compress_block_params_init_fast(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 3: + ispc::bc7e_compress_block_params_init_basic(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 4: + ispc::bc7e_compress_block_params_init_slow(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 5: + ispc::bc7e_compress_block_params_init_veryslow(&m_bc7e_pack_params, m_params.m_perceptual); + break; + case 6: + default: + ispc::bc7e_compress_block_params_init_slowest(&m_bc7e_pack_params, m_params.m_perceptual); + break; + } +#endif + + if (m_params.m_status_output) + { + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + if ((SUPPORT_BC7E) && (m_params.m_use_bc7e)) + printf("bc7e.ispc uber level: %u, perceptual: %u\n", m_params.m_bc7_uber_level, m_params.m_perceptual); + else + { + printf("\nbc7enc parameters:\n"); + m_bc7enc_pack_params.print(); + } + } + else + { + printf("BC1 level: %u, use 3-color mode: %u, use 3-color mode for black: %u, bc1_mode: %u\n", + m_params.m_bc1_quality_level, m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black, (int)m_params.m_bc1_mode); + } + + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM)) + { + printf("Use high quality BC4 block encoder: %u, BC4 block radius: %u, use 6 value mode: %u, use 8 value mode: %u\n", + m_params.m_use_hq_bc345, m_params.m_bc345_search_rad, (m_params.m_bc345_mode_mask & 2) != 0, (m_params.m_bc345_mode_mask & 1) != 0); + } + + printf("\nrdo_bc_params:\n"); + printf(" Perceptual: %u\n", m_params.m_perceptual); + printf(" Y Flip: %u\n", m_params.m_y_flip); + printf(" DXGI format: 0x%X %s\n", m_params.m_dxgi_format, get_dxgi_format_string(m_params.m_dxgi_format)); + + printf("BC1-5 parameters:\n"); + printf(" BC45 channels: %u %u\n", m_params.m_bc45_channel0, m_params.m_bc45_channel1); + printf(" BC1 approximation mode: %u\n", (int)m_params.m_bc1_mode); + printf(" Use BC1 3-color mode: %u\n", m_params.m_use_bc1_3color_mode); + printf(" Use BC1 3-color mode for black: %u\n", m_params.m_use_bc1_3color_mode_for_black); + printf(" BC1 quality level: %u\n", m_params.m_bc1_quality_level); + printf(" Use HQ BC345: %u\n", m_params.m_use_hq_bc345); + printf(" BC345 search radius: %u\n", m_params.m_bc345_search_rad); + printf(" BC345 mode mask: 0x%X\n", m_params.m_bc345_mode_mask); + + printf("BC7 parameters:\n"); + printf(" Use bc7e: %u\n", m_params.m_use_bc7e); + printf(" BC7 uber level: %u\n", m_params.m_bc7_uber_level); + + printf("RDO parameters:\n"); + printf(" Lambda: %f\n", m_params.m_rdo_lambda); + printf(" Lookback window size: %u\n", m_params.m_lookback_window_size); + printf(" Custom lookback window size: %u\n", m_params.m_custom_lookback_window_size); + printf(" Try 2 matches: %u\n", m_params.m_rdo_try_2_matches); + printf(" Smooth block error scale: %f\n", m_params.m_rdo_smooth_block_error_scale); + printf(" Custom RDO smooth block error scale: %u\n", m_params.m_custom_rdo_smooth_block_error_scale); + printf(" Max smooth block std dev: %f\n", m_params.m_rdo_max_smooth_block_std_dev); + printf(" Allow relative movement: %u\n", m_params.m_rdo_allow_relative_movement); + printf(" Ultrasmooth block handling: %u\n", m_params.m_rdo_ultrasmooth_block_handling); + printf(" Multithreading: %u, max threads: %u\n", m_params.m_rdo_multithreading, m_params.m_rdo_max_threads); + + printf("bc7enc parameters:\n"); + printf(" Mode 6 only: %u\n", m_params.m_bc7enc_mode6_only); + printf(" Max partitions to scan: %u\n", m_params.m_bc7enc_max_partitions_to_scan); + printf(" Quant mode 6 endpoints: %u\n", m_params.m_bc7enc_rdo_bc7_quant_mode6_endpoints); + printf(" Weight modes: %u\n", m_params.m_bc7enc_rdo_bc7_weight_modes); + printf(" Weight low freq partitions: %u\n", m_params.m_bc7enc_rdo_bc7_weight_low_frequency_partitions); + printf(" P-bit1 weighting: %u\n", m_params.m_bc7enc_rdo_bc7_pbit1_weighting); + printf(" Reduce entropy mode: %u\n", m_params.m_bc7enc_reduce_entropy); + printf("\n"); + } + + return true; + } + + bool rdo_bc_encoder::init_source_image() + { + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + case DXGI_FORMAT_BC4_UNORM: + m_pixel_format_bpp = 4; + break; + case DXGI_FORMAT_BC3_UNORM: + case DXGI_FORMAT_BC5_UNORM: + case DXGI_FORMAT_BC7_UNORM: + m_pixel_format_bpp = 8; + break; + default: + return false; + } + + m_bytes_per_block = (16 * m_pixel_format_bpp) / 8; + assert((m_bytes_per_block == 8) || (m_bytes_per_block == 16)); + + m_source_image = *m_pOrig_source_image; + + m_orig_width = m_source_image.width(); + m_orig_height = m_source_image.height(); + + if (m_params.m_y_flip) + { + utils::image_u8 temp; + temp.init(m_orig_width, m_orig_height); + + for (uint32_t y = 0; y < m_orig_height; y++) + for (uint32_t x = 0; x < m_orig_width; x++) + temp(x, (m_orig_height - 1) - y) = m_source_image(x, y); + + temp.swap(m_source_image); + } + + m_source_image.crop_dup_borders((m_source_image.width() + 3) & ~3, (m_source_image.height() + 3) & ~3); + + m_blocks_x = m_source_image.width() / 4; + m_blocks_y = m_source_image.height() / 4; + m_total_blocks = m_blocks_x * m_blocks_y; + m_total_texels = m_total_blocks * 16; + + bool has_alpha = false; + for (int by = 0; by < ((int)m_blocks_y) && !has_alpha; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 pixels[16]; + m_source_image.get_block(bx, by, 4, 4, pixels); + + for (uint32_t i = 0; i < 16; i++) + { + if (pixels[i].m_c[3] < 255) + { + has_alpha = true; + break; + } + } + } + } + + if (m_pixel_format_bpp == 8) + m_packed_image16.resize(m_total_blocks); + else + m_packed_image8.resize(m_total_blocks); + + return true; + } + + bool rdo_bc_encoder::encode_texture() + { + clock_t start_t = clock(); + + uint32_t bc7_mode_hist[8]; + memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist)); + +#if SUPPORT_BC7E + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) && (m_params.m_use_bc7e)) + { + if (m_params.m_status_output) + printf("Using bc7e: "); + +#pragma omp parallel for + for (int32_t by = 0; by < static_cast(m_blocks_y); by++) + { + // Process 64 blocks at a time, for efficient SIMD processing. + // Ideally, N >= 8 (or more) and (N % 8) == 0. + const int N = 64; + + for (uint32_t bx = 0; bx < m_blocks_x; bx += N) + { + const uint32_t num_blocks_to_process = std::min(m_blocks_x - bx, N); + + color_quad_u8 pixels[16 * N]; + + // Extract num_blocks_to_process 4x4 pixel blocks from the source image and put them into the pixels[] array. + for (uint32_t b = 0; b < num_blocks_to_process; b++) + m_source_image.get_block(bx + b, by, 4, 4, pixels + b * 16); + + // Compress the blocks to BC7. + // Note: If you've used Intel's ispc_texcomp, the input pixels are different. BC7E requires a pointer to an array of 16 pixels for each block. + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + ispc::bc7e_compress_blocks(num_blocks_to_process, reinterpret_cast(pBlock), reinterpret_cast(pixels), &m_bc7e_pack_params); + } + + if (m_params.m_status_output) + { + if ((by & 63) == 0) + printf("."); + } + } + + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + uint32_t mode = ((uint8_t*)pBlock)[0]; + for (uint32_t m = 0; m <= 7; m++) + { + if (mode & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + } + } + } + else +#endif + { +#pragma omp parallel for + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 pixels[16]; + + m_source_image.get_block(bx, by, 4, 4, pixels); + + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + { + block8* pBlock = &m_packed_image8[bx + by * m_blocks_x]; + + rgbcx::encode_bc1(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_use_bc1_3color_mode, m_params.m_use_bc1_3color_mode_for_black); + break; + } + case DXGI_FORMAT_BC3_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc3_hq(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0], m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc3(m_params.m_bc1_quality_level, pBlock, &pixels[0].m_c[0]); + break; + } + case DXGI_FORMAT_BC4_UNORM: + { + block8* pBlock = &m_packed_image8[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc4_hq(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc4(pBlock, &pixels[0].m_c[m_params.m_bc45_channel0], 4); + break; + } + case DXGI_FORMAT_BC5_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + if (m_params.m_use_hq_bc345) + rgbcx::encode_bc5_hq(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4, m_params.m_bc345_search_rad, m_params.m_bc345_mode_mask); + else + rgbcx::encode_bc5(pBlock, &pixels[0].m_c[0], m_params.m_bc45_channel0, m_params.m_bc45_channel1, 4); + break; + } + case DXGI_FORMAT_BC7_UNORM: + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + bc7enc_compress_block(pBlock, pixels, &m_bc7enc_pack_params); + +#pragma omp critical + { + uint32_t mode = ((uint8_t*)pBlock)[0]; + for (uint32_t m = 0; m <= 7; m++) + { + if (mode & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + } + + break; + } + default: + { + assert(0); + break; + } + } + } + + if (m_params.m_status_output) + { + if ((by & 127) == 0) + printf("."); + } + } + } + + clock_t end_t = clock(); + + if (m_params.m_status_output) + { + printf("\nTotal encoding time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC); + + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + printf("BC7 mode histogram:\n"); + for (uint32_t i = 0; i < 8; i++) + printf("%u: %u\n", i, bc7_mode_hist[i]); + } + } + + return true; + } + + bool rdo_bc_encoder::postprocess_rdo() + { + m_prerdo_packed_image8 = m_packed_image8; + m_prerdo_packed_image16 = m_packed_image16; + + // Post-process the data with Rate Distortion Optimization + if (m_params.m_rdo_lambda <= 0.0f) + return true; + + const uint32_t MIN_RDO_MULTITHREADING_BLOCKS = 4096; + const int rdo_total_threads = (m_params.m_rdo_multithreading && (m_params.m_rdo_max_threads > 1) && (m_total_blocks >= MIN_RDO_MULTITHREADING_BLOCKS)) ? m_params.m_rdo_max_threads : 1; + + if (m_params.m_status_output) + printf("rdo_total_threads: %u\n", rdo_total_threads); + + int blocks_remaining = m_total_blocks, cur_block_index = 0; + vector blocks_to_do(rdo_total_threads), first_block_index(rdo_total_threads); + for (int p = 0; p < rdo_total_threads; p++) + { + const int num_blocks = (p == (rdo_total_threads - 1)) ? blocks_remaining : (m_total_blocks / rdo_total_threads); + + blocks_to_do[p] = num_blocks; + first_block_index[p] = cur_block_index; + + cur_block_index += num_blocks; + blocks_remaining -= num_blocks; + } + + assert(!blocks_remaining && cur_block_index == (int)m_total_blocks); + + ert::reduce_entropy_params ert_p; + + ert_p.m_lambda = m_params.m_rdo_lambda; + ert_p.m_lookback_window_size = m_params.m_lookback_window_size; + ert_p.m_smooth_block_max_mse_scale = m_params.m_rdo_smooth_block_error_scale; + ert_p.m_max_smooth_block_std_dev = m_params.m_rdo_max_smooth_block_std_dev; + ert_p.m_debug_output = m_params.m_rdo_debug_output; + ert_p.m_try_two_matches = m_params.m_rdo_try_2_matches; + ert_p.m_allow_relative_movement = m_params.m_rdo_allow_relative_movement; + ert_p.m_skip_zero_mse_blocks = false; + + vector block_rgb_mse_scales(compute_block_mse_scales(m_source_image, m_blocks_x, m_blocks_y, m_total_blocks, m_params.m_rdo_debug_output)); + + vector block_pixels(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + m_source_image.get_block(bx, by, 4, 4, (color_quad_u8*)&block_pixels[(bx + by * m_blocks_x) * 16]); + + unpacker_funcs block_unpackers; + block_unpackers.m_allow_3color_mode = m_params.m_use_bc1_3color_mode; + block_unpackers.m_use_bc1_3color_mode_for_black = m_params.m_use_bc1_3color_mode_for_black; + block_unpackers.m_mode = m_params.m_bc1_mode; + + if (m_params.m_dxgi_format == DXGI_FORMAT_BC7_UNORM) + { + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + // BC7 RDO + const uint32_t NUM_COMPONENTS = 4; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + 16, 16, 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc7_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks); + + uint32_t bc7_mode_hist[8]; + memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist)); + + for (int by = 0; by < (int)m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + block16* pBlock = &m_packed_image16[bx + by * m_blocks_x]; + + const uint32_t mode_byte = ((uint8_t*)pBlock)[0]; + + uint32_t m; + for (m = 0; m <= 7; m++) + { + if (mode_byte & (1 << m)) + { + bc7_mode_hist[m]++; + break; + } + } + assert(m != 8); + } + } + + printf("BC7 mode histogram:\n"); + for (uint32_t i = 0; i < 8; i++) + printf("%u: %u\n", i, bc7_mode_hist[i]); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC5_UNORM) + { + // BC5 RDO - One BC4 block for R followed by one BC4 block for G + + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + vector block_pixels_r(m_total_blocks * 16), block_pixels_g(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 orig_block[16]; + m_source_image.get_block(bx, by, 4, 4, orig_block); + + color_quad_u8* pDst_block_r = (color_quad_u8*)&block_pixels_r[(bx + by * m_blocks_x) * 16]; + color_quad_u8* pDst_block_g = (color_quad_u8*)&block_pixels_g[(bx + by * m_blocks_x) * 16]; + + for (uint32_t i = 0; i < 16; i++) + { + pDst_block_r[i].set(orig_block[i].r, 0, 0, 0); + pDst_block_g[i].set(orig_block[i].g, 0, 0, 0); + } + } + } + + const uint32_t NUM_COMPONENTS = 1; + + ert_p.m_color_weights[1] = 0; + ert_p.m_color_weights[2] = 0; + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified_r = 0, total_modified_g = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local_r = 0, total_modified_local_g = 0; + + ert::reduce_entropy(&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + 2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels_r[16 * first_block_to_encode], ert_p, total_modified_local_r, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc4_block), num_blocks_to_encode, + 2 * sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels_g[16 * first_block_to_encode], ert_p, total_modified_local_g, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + +#pragma omp critical + { + total_modified_r += total_modified_local_r; + total_modified_g += total_modified_local_g; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified R: %u %3.2f%%\n", total_modified_r, total_modified_r * 100.0f / m_total_blocks); + printf("Total blocks modified G: %u %3.2f%%\n", total_modified_g, total_modified_g * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC4_UNORM) + { + // BC4 RDO - One BC4 block for R + + const uint32_t NUM_COMPONENTS = 1; + + ert_p.m_color_weights[1] = 0; + ert_p.m_color_weights[2] = 0; + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // Attempt to compute a decent conservative smooth block MSE max scaling factor. + // No single smooth block scale setting can work for all textures (unless it's ridiuclously large, killing efficiency). + ert_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + if (m_params.m_status_output) + { + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + } + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc4_block), sizeof(rgbcx::bc4_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", total_modified, total_modified * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) + { + // BC1 RDO - One BC1 block + const uint32_t NUM_COMPONENTS = 3; + + ert_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // This is just a hack - no single setting can work for all textures. + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override)\n", ert_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + printf("\nERT parameters:\n"); + ert_p.print(); + printf("\n"); + + uint32_t total_modified = 0; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local = 0; + + vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy(&m_packed_image8[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc1_block), sizeof(rgbcx::bc1_block), 4, 4, NUM_COMPONENTS, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local, + unpacker_funcs::unpack_bc1_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified += total_modified_local; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total blocks modified: %u %3.2f%%\n", + total_modified, total_modified * 100.0f / m_total_blocks); + } + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) + { + // BC3 RDO - One BC4 block followed by one BC1 block + + ert_p.m_lookback_window_size = std::max(16U, m_params.m_lookback_window_size); + + vector block_pixels_a(m_total_blocks * 16); + + for (uint32_t by = 0; by < m_blocks_y; by++) + { + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + color_quad_u8 orig_block[16]; + m_source_image.get_block(bx, by, 4, 4, orig_block); + + color_quad_u8* pDst_block_a = (color_quad_u8*)&block_pixels_a[(bx + by * m_blocks_x) * 16]; + for (uint32_t i = 0; i < 16; i++) + pDst_block_a[i].set(orig_block[i].a, 0, 0, 0); + } + } + + ert_p.m_color_weights[3] = 0; + + ert::reduce_entropy_params ert_alpha_p(ert_p); + ert_alpha_p.m_color_weights[1] = 0; + ert_alpha_p.m_color_weights[2] = 0; + ert_alpha_p.m_color_weights[3] = 0; + + if (!m_params.m_custom_rdo_smooth_block_error_scale) + { + // This is just a hack - no single setting can work for all textures. + ert_p.m_smooth_block_max_mse_scale = lerp(15.0f, 50.0f, std::min(1.0f, ert_p.m_lambda / 8.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f (use -zb# to override) for RGB\n", ert_p.m_smooth_block_max_mse_scale); + + ert_alpha_p.m_smooth_block_max_mse_scale = lerp(10.0f, 30.0f, std::min(1.0f, ert_alpha_p.m_lambda / 4.0f)); + + if (m_params.m_status_output) + printf("Using an automatically computed smooth block error scale of %f for Alpha\n", ert_alpha_p.m_smooth_block_max_mse_scale); + } + + for (uint32_t by = 0; by < m_blocks_y; by++) + for (uint32_t bx = 0; bx < m_blocks_x; bx++) + { + float& s = block_rgb_mse_scales[bx + by * m_blocks_x]; + if (s > 0.0f) + s = std::max(ert_p.m_smooth_block_max_mse_scale, s * std::min(ert_p.m_lambda, 3.0f)); + } + + if (m_params.m_status_output) + { + printf("\nERT RGB parameters:\n"); + ert_p.print(); + + printf("\nERT Alpha parameters:\n"); + ert_alpha_p.print(); + printf("\n"); + } + + uint32_t total_modified_rgb = 0, total_modified_alpha = 0; + + block_unpackers.m_allow_3color_mode = false; + block_unpackers.m_use_bc1_3color_mode_for_black = false; + + clock_t rdo_start_t = clock(); + +#pragma omp parallel for + for (int p = 0; p < rdo_total_threads; p++) + { + const int first_block_to_encode = first_block_index[p]; + const int num_blocks_to_encode = blocks_to_do[p]; + if (!num_blocks_to_encode) + continue; + + uint32_t total_modified_local_rgb = 0, total_modified_local_alpha = 0; + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode], num_blocks_to_encode, + sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc4_block), 4, 4, 1, + (ert::color_rgba*)&block_pixels_a[16 * first_block_to_encode], ert_alpha_p, total_modified_local_alpha, + unpacker_funcs::unpack_bc4_block, &block_unpackers); + + vector local_block_rgb_mse_scales(num_blocks_to_encode); + for (int i = 0; i < num_blocks_to_encode; i++) + local_block_rgb_mse_scales[i] = block_rgb_mse_scales[first_block_to_encode + i]; + + ert::reduce_entropy((uint8_t*)&m_packed_image16[first_block_to_encode] + sizeof(rgbcx::bc1_block), num_blocks_to_encode, + sizeof(rgbcx::bc1_block) * 2, sizeof(rgbcx::bc1_block), 4, 4, 3, + (ert::color_rgba*)&block_pixels[16 * first_block_to_encode], ert_p, total_modified_local_rgb, + unpacker_funcs::unpack_bc1_block, &block_unpackers, + m_params.m_rdo_ultrasmooth_block_handling ? &local_block_rgb_mse_scales : nullptr); + +#pragma omp critical + { + total_modified_rgb += total_modified_local_rgb; + total_modified_alpha += total_modified_local_alpha; + } + } // p + + clock_t rdo_end_t = clock(); + + if (m_params.m_status_output) + { + printf("Total RDO time: %f secs\n", (double)(rdo_end_t - rdo_start_t) / CLOCKS_PER_SEC); + + printf("Total RGB blocks modified: %u %3.2f%%\n", total_modified_rgb, total_modified_rgb * 100.0f / m_total_blocks); + printf("Total Alpha blocks modified: %u %3.2f%%\n", total_modified_alpha, total_modified_alpha * 100.0f / m_total_blocks); + } + } + + return true; + } + + bool rdo_bc_encoder::unpack_blocks(image_u8& unpacked_image) const + { + unpacked_image.init(get_blocks_x() * 4, get_blocks_y() * 4); + + bool bc1_punchthrough_flag = false; + bool used_bc1_transparent_texels_for_black = false; + + bool unpack_failed = false; + +#pragma omp parallel for + for (int by = 0; by < (int)get_blocks_y(); by++) + { + for (uint32_t bx = 0; bx < get_blocks_x(); bx++) + { + const void* pBlock = (const uint8_t*)get_blocks() + (bx + by * get_blocks_x()) * get_bytes_per_block(); + + color_quad_u8 unpacked_pixels[16]; + for (uint32_t i = 0; i < 16; i++) + unpacked_pixels[i].set(0, 0, 0, 255); + + switch (m_params.m_dxgi_format) + { + case DXGI_FORMAT_BC1_UNORM: + { + const bool used_punchthrough = rgbcx::unpack_bc1(pBlock, unpacked_pixels, true, m_params.m_bc1_mode); + + if (used_punchthrough) + { + bc1_punchthrough_flag = true; + + const rgbcx::bc1_block* pBC1_block = (const rgbcx::bc1_block*)pBlock; + + for (uint32_t y = 0; y < 4; y++) + for (uint32_t x = 0; x < 4; x++) + if (pBC1_block->get_selector(x, y) == 3) + used_bc1_transparent_texels_for_black = true; + } + + break; + } + case DXGI_FORMAT_BC3_UNORM: + { + if (!rgbcx::unpack_bc3(pBlock, unpacked_pixels, m_params.m_bc1_mode)) + bc1_punchthrough_flag = true; + break; + } + case DXGI_FORMAT_BC4_UNORM: + { + rgbcx::unpack_bc4(pBlock, &unpacked_pixels[0][0], 4); + +#if DECODE_BC4_TO_GRAYSCALE + for (uint32_t i = 0; i < 16; i++) + { + unpacked_pixels[i][1] = unpacked_pixels[i][0]; + unpacked_pixels[i][2] = unpacked_pixels[i][0]; + } +#endif + break; + } + case DXGI_FORMAT_BC5_UNORM: + { + rgbcx::unpack_bc5(pBlock, &unpacked_pixels[0][0], 0, 1, 4); + break; + } + case DXGI_FORMAT_BC7_UNORM: + { + if (!bc7decomp::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels)) + { + fprintf(stderr, "bc7decomp::unpack_bc7() failed!\n"); + unpack_failed = true; + } + + // Now unpack the block using the non-SSE reference decoder, to make sure we get the same exact unpacked bits. + color_quad_u8 unpacked_pixels_ref[16]; + if (!bc7decomp_ref::unpack_bc7((const uint8_t*)pBlock, (bc7decomp::color_rgba*)unpacked_pixels_ref)) + { + fprintf(stderr, "bc7decomp::unpack_bc7_ref() failed!\n"); + unpack_failed = true; + } + + if (memcmp(unpacked_pixels, unpacked_pixels_ref, sizeof(unpacked_pixels)) != 0) + { + fprintf(stderr, "BC7 unpack verification failed!\n"); + unpack_failed = true; + } + + break; + } + default: + assert(0); + break; + } + + unpacked_image.set_block(bx, by, 4, 4, unpacked_pixels); + } // bx + } // by + + if (unpack_failed) + return false; + + // Sanity check the BC1/BC3 output + if (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM) + { + if (bc1_punchthrough_flag) + fprintf(stderr, "WARNING: BC3 mode selected, but rgbcx::unpack_bc3() returned one or more blocks using 3-color mode!\n"); + } + else if (m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) + { + if ((bc1_punchthrough_flag) && (!m_params.m_use_bc1_3color_mode)) + fprintf(stderr, "WARNING: BC1 output used 3-color mode, when this was disabled!\n"); + + if ((used_bc1_transparent_texels_for_black) && (!used_bc1_transparent_texels_for_black)) + fprintf(stderr, "WARNING: BC1 output used the transparent selector for black, when this was disabled!\n"); + } + + if (m_params.m_status_output) + { + if ((m_params.m_dxgi_format == DXGI_FORMAT_BC1_UNORM) || (m_params.m_dxgi_format == DXGI_FORMAT_BC3_UNORM)) + printf("Output used 3-color mode: %u, output used transparent texels for black: %u\n", bc1_punchthrough_flag, used_bc1_transparent_texels_for_black); + } + + return true; + } + +} // namespace rdo_bc diff --git a/libkram/bc7enc/rdo_bc_encoder.h b/libkram/bc7enc/rdo_bc_encoder.h new file mode 100644 index 00000000..469211e1 --- /dev/null +++ b/libkram/bc7enc/rdo_bc_encoder.h @@ -0,0 +1,269 @@ +// rdo_bc_encoder.h +#pragma once + +#ifndef SUPPORT_BC7E +#define SUPPORT_BC7E 0 +#endif + +#include "utils.h" +#include "ert.h" + +#include "bc7decomp.h" +#include "rgbcx.h" + +#include "bc7enc.h" + +#if SUPPORT_BC7E +#include "bc7e_ispc.h" +#endif + +//#include "dds_defs.h" + +// TODO: code below doesn't handle srgb case +enum DXGI_FORMAT +{ + DXGI_FORMAT_BC1_UNORM = 71, + DXGI_FORMAT_BC1_UNORM_SRGB = 72, + DXGI_FORMAT_BC2_UNORM = 74, + DXGI_FORMAT_BC2_UNORM_SRGB = 75, + DXGI_FORMAT_BC3_UNORM = 77, + DXGI_FORMAT_BC3_UNORM_SRGB = 78, + DXGI_FORMAT_BC4_UNORM = 80, + DXGI_FORMAT_BC4_SNORM = 81, + DXGI_FORMAT_BC5_UNORM = 83, + DXGI_FORMAT_BC5_SNORM = 84, + DXGI_FORMAT_BC6H_UF16 = 95, + DXGI_FORMAT_BC6H_SF16 = 96, + DXGI_FORMAT_BC7_UNORM = 98, + DXGI_FORMAT_BC7_UNORM_SRGB = 99, +}; + +namespace rdo_bc +{ + + struct rdo_bc_params + { + rdo_bc_params() + { + clear(); + } + + void clear() + { + m_bc7_uber_level = 6; // BC7ENC_MAX_UBER_LEVEL; + m_bc7enc_max_partitions_to_scan = BC7ENC_MAX_PARTITIONS; + m_perceptual = false; + m_y_flip = false; + m_bc45_channel0 = 0; + m_bc45_channel1 = 1; + + m_bc1_mode = rgbcx::bc1_approx_mode::cBC1Ideal; + m_use_bc1_3color_mode = true; + + // We're just turning this on by default now, like NVDXT.EXE used to do back in the old original Xbox days. + m_use_bc1_3color_mode_for_black = true; // false; + + m_bc1_quality_level = rgbcx::MAX_LEVEL; + + m_dxgi_format = DXGI_FORMAT_BC7_UNORM; + + m_rdo_lambda = 0.0f; + m_rdo_debug_output = false; + m_rdo_smooth_block_error_scale = 15.0f; + m_custom_rdo_smooth_block_error_scale = false; + m_lookback_window_size = 128; + m_custom_lookback_window_size = false; + m_bc7enc_rdo_bc7_quant_mode6_endpoints = true; + m_bc7enc_rdo_bc7_weight_modes = true; + m_bc7enc_rdo_bc7_weight_low_frequency_partitions = true; + m_bc7enc_rdo_bc7_pbit1_weighting = true; + m_rdo_max_smooth_block_std_dev = 18.0f; + m_rdo_allow_relative_movement = false; + m_rdo_try_2_matches = true; + m_rdo_ultrasmooth_block_handling = true; + + m_use_hq_bc345 = true; + m_bc345_search_rad = 5; + m_bc345_mode_mask = rgbcx::BC4_USE_ALL_MODES; + + m_bc7enc_mode6_only = false; + m_rdo_multithreading = true; + + m_bc7enc_reduce_entropy = false; + + m_use_bc7e = false; + +#if SUPPORT_BC7E + // By default, if they've compiled in BC7E.ispc, then use that. In a rate distortion sense it's better overall. + // https://richg42.blogspot.com/2021/02/average-rate-distortion-curves-for.html + m_use_bc7e = true; +#endif + + m_status_output = false; + + m_rdo_max_threads = 128; + } + + int m_bc7_uber_level; + int m_bc7enc_max_partitions_to_scan; + bool m_perceptual; + bool m_y_flip; + uint32_t m_bc45_channel0; + uint32_t m_bc45_channel1; + + rgbcx::bc1_approx_mode m_bc1_mode; + bool m_use_bc1_3color_mode; + + bool m_use_bc1_3color_mode_for_black; + + int m_bc1_quality_level; + + DXGI_FORMAT m_dxgi_format; + + float m_rdo_lambda; + bool m_rdo_debug_output; + float m_rdo_smooth_block_error_scale; + bool m_custom_rdo_smooth_block_error_scale; + uint32_t m_lookback_window_size; + bool m_custom_lookback_window_size; + bool m_bc7enc_rdo_bc7_quant_mode6_endpoints; + bool m_bc7enc_rdo_bc7_weight_modes; + bool m_bc7enc_rdo_bc7_weight_low_frequency_partitions; + bool m_bc7enc_rdo_bc7_pbit1_weighting; + float m_rdo_max_smooth_block_std_dev; + bool m_rdo_allow_relative_movement; + bool m_rdo_try_2_matches; + bool m_rdo_ultrasmooth_block_handling; + + bool m_use_hq_bc345; + int m_bc345_search_rad; + uint32_t m_bc345_mode_mask; + + bool m_bc7enc_mode6_only; + bool m_rdo_multithreading; + + bool m_bc7enc_reduce_entropy; + + bool m_use_bc7e; + bool m_status_output; + + uint32_t m_rdo_max_threads; + }; + + class rdo_bc_encoder + { + public: + rdo_bc_encoder(); + + void clear(); + + bool init(const utils::image_u8& src_image, rdo_bc_params& params); + bool encode(); + + const rdo_bc_params &get_params() const { return m_params; } + + const utils::image_u8* get_orig_source_image() const { return m_pOrig_source_image; } + const utils::image_u8& get_source_image() const { return m_source_image; } + + const void* get_prerdo_blocks() const { return m_prerdo_packed_image8.size() ? (void*)m_prerdo_packed_image8.data() : (void*)m_prerdo_packed_image16.data(); } + const void* get_blocks() const { return m_packed_image8.size() ? (void*)m_packed_image8.data() : (void*)m_packed_image16.data(); } + + bool unpack_blocks(utils::image_u8& unpacked_image) const; + + DXGI_FORMAT get_pixel_format() const { return m_params.m_dxgi_format; } + + uint32_t get_orig_width() const { return m_orig_width; } + uint32_t get_orig_height() const { return m_orig_height; } + uint32_t get_blocks_x() const { return m_blocks_x; } + uint32_t get_blocks_y() const { return m_blocks_y; } + uint32_t get_total_blocks() const { return m_total_blocks; } + uint32_t get_total_blocks_size_in_bytes() const { return m_total_blocks * m_bytes_per_block; } + uint32_t get_bytes_per_block() const { return m_bytes_per_block; } + uint32_t get_pixel_format_bpp() const { return m_pixel_format_bpp; } + uint32_t get_total_texels() const { return m_total_texels; } + bool get_has_alpha() const { return m_has_alpha; } + + private: + const utils::image_u8* m_pOrig_source_image; + utils::image_u8 m_source_image; + rdo_bc_params m_params; + + uint32_t m_orig_width, m_orig_height; + uint32_t m_blocks_x, m_blocks_y, m_total_blocks, m_bytes_per_block, m_pixel_format_bpp; + uint32_t m_total_texels; + bool m_has_alpha; + + utils::block8_vec m_packed_image8; + utils::block16_vec m_packed_image16; + + utils::block8_vec m_prerdo_packed_image8; + utils::block16_vec m_prerdo_packed_image16; + + bc7enc_compress_block_params m_bc7enc_pack_params; +#if SUPPORT_BC7E + ispc::bc7e_compress_block_params m_bc7e_pack_params; +#endif + + void init_encoders(); + bool init_source_image(); + bool init_encoder_params(); + bool encode_texture(); + + struct unpacker_funcs + { + rgbcx::bc1_approx_mode m_mode; + bool m_allow_3color_mode; + bool m_use_bc1_3color_mode_for_black; + + static bool unpack_bc1_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + const unpacker_funcs* pState = (const unpacker_funcs*)pUser_data; + + bool used_3color_mode = rgbcx::unpack_bc1(pBlock, pPixels, true, pState->m_mode); + + if (used_3color_mode) + { + if (!pState->m_allow_3color_mode) + return false; + + if (!pState->m_use_bc1_3color_mode_for_black) + { + rgbcx::bc1_block* pBC1_block = (rgbcx::bc1_block*)pBlock; + + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + if (pBC1_block->get_selector(x, y) == 3) + return false; + } // x + } // y + } + } + + return true; + } + + // TODO: Enforce 6/8 color constraints + static bool unpack_bc4_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + (void)pUser_data; + memset(pPixels, 0, sizeof(ert::color_rgba) * 16); + rgbcx::unpack_bc4(pBlock, (uint8_t*)pPixels, 4); + return true; + } + + static bool unpack_bc7_block(const void* pBlock, ert::color_rgba* pPixels, uint32_t block_index, void* pUser_data) + { + (void)block_index; + (void)pUser_data; + return bc7decomp::unpack_bc7(pBlock, (bc7decomp::color_rgba*)pPixels); + } + }; + + bool postprocess_rdo(); + }; + +} // namespace rdo_bc diff --git a/libkram/bc7enc/rgbcx.cpp b/libkram/bc7enc/rgbcx.cpp new file mode 100644 index 00000000..12ff023f --- /dev/null +++ b/libkram/bc7enc/rgbcx.cpp @@ -0,0 +1,3085 @@ +// rgbcx.cpp - see license at end of rgbcx.h +#include "rgbcx.h" +#include +#include +//#include + +namespace rgbcx +{ + //const uint8_t g_bc1_to_linear[4] = { 0, 3, 1, 2 }; + + const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969; + +#ifdef _MSC_VER +#pragma region +#endif + // All total orderings for 16 pixels 2-bit selectors. + // BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order). + static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] = + { + {0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8}, + {6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4}, + {0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4}, + {3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8}, + {0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6}, + {2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5}, + {7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0}, + {2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2}, + {9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3}, + {6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5}, + {1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13}, + {0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2}, + {1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0}, + {8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8}, + {6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2}, + {1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5}, + {1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1}, + {8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6}, + {1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5}, + {1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8}, + {5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5}, + {5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5}, + {0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1}, + {10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0}, + {3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3}, + {2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0}, + {1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10}, + {0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4}, + {2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1}, + {0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11}, + {3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10} + }; + + // All total orderings for 16 pixels [0,2] 2-bit selectors. + // BC1 selector order: 0, 1, 2 + // Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order. + const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153; + static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] = + { + {6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7}, + {0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4}, + {11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1}, + {4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9}, + {4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0}, + {12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6}, + {2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11}, + {0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15}, + {9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0}, + {2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9} + }; + + // For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness. + static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] = + { +#if RGBCX_USE_SMALLER_TABLES + #include "rgbcx_table4_small.h" +#else + #include "rgbcx_table4.h" +#endif + }; + + static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] = + { + { 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11 }, + { 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25 }, + { 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11 }, + { 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92 }, + { 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74 }, + { 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32 }, + { 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14 }, + { 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11 }, + { 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105 }, + { 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12 }, + { 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74 }, + { 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1 }, + { 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62 }, + { 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2 }, + { 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21 }, + { 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146 }, + { 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41 }, + { 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41 }, + { 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99 }, + { 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42 }, + { 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40 }, + { 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12 }, + { 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4 }, + { 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43 }, + { 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38 }, + { 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83 }, + { 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123 }, + { 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88 }, + { 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54 }, + { 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64 }, + { 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25 }, + { 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51 }, + { 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139 }, + { 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92 }, + { 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34 }, + { 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35 }, + { 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130 }, + { 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24 }, + { 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41 }, + { 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16 }, + { 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35 }, + { 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65 }, + { 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72 }, + { 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120 }, + { 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39 }, + { 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5 }, + { 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111 }, + { 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41 }, + { 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119 }, + { 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35 }, + { 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21 }, + { 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36 }, + { 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92 }, + { 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130 }, + { 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65 }, + { 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42 }, + { 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30 }, + { 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78 }, + { 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38 }, + { 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22 }, + { 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24 }, + { 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10 }, + { 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21 }, + { 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22 }, + { 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48 }, + { 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84 }, + { 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2 }, + { 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5 }, + { 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14 }, + { 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119 }, + { 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16 }, + { 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3 }, + { 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92 }, + { 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6 }, + { 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10 }, + { 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132 }, + { 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19 }, + { 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129 }, + { 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70 }, + { 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8 }, + { 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6 }, + { 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22 }, + { 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38 }, + { 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41 }, + { 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145 }, + { 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41 }, + { 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20 }, + { 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122 }, + { 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4 }, + { 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111 }, + { 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114 }, + { 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18 }, + { 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126 }, + { 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10 }, + { 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34 }, + { 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10 }, + { 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4 }, + { 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72 }, + { 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61 }, + { 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48 }, + { 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34 }, + { 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18 }, + { 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145 }, + { 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131 }, + { 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62 }, + { 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49 }, + { 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56 }, + { 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40 }, + { 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50 }, + { 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10 }, + { 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4 }, + { 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9 }, + { 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14 }, + { 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104 }, + { 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1 }, + { 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26 }, + { 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50 }, + { 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45 }, + { 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48 }, + { 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20 }, + { 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72 }, + { 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47 }, + { 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120 }, + { 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83 }, + { 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38 }, + { 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74 }, + { 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41 }, + { 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127 }, + { 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29 }, + { 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115 }, + { 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83 }, + { 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44 }, + { 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48 }, + { 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72 }, + { 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65 }, + { 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19 }, + { 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48 }, + { 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53 }, + { 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109 }, + { 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116 }, + { 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108 }, + { 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14 }, + { 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48 }, + { 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75 }, + { 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11 }, + { 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10 }, + { 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68 }, + { 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133 }, + { 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11 }, + { 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99 }, + { 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20 }, + { 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107 }, + { 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48 } + }; +#ifdef _MSC_VER +#pragma endregion +#endif + + static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + //static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + + static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } + + template inline T square(T a) { return a * a; } + + static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } + + template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } + static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } + + static inline int squarei(int a) { return a * a; } + //static inline int absi(int a) { return (a < 0) ? -a : a; } + + template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } + + static const uint32_t TOTAL_ORDER_4_0_16 = 15; + static const uint32_t TOTAL_ORDER_4_1_16 = 700; + static const uint32_t TOTAL_ORDER_4_2_16 = 753; + static const uint32_t TOTAL_ORDER_4_3_16 = 515; + static uint16_t g_total_ordering4_hash[4096]; + static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; + + static const uint32_t TOTAL_ORDER_3_0_16 = 12; + static const uint32_t TOTAL_ORDER_3_1_16 = 15; + static const uint32_t TOTAL_ORDER_3_2_16 = 89; + static uint16_t g_total_ordering3_hash[256]; + static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; + + struct hist4 + { + uint8_t m_hist[4]; + + hist4() + { + memset(m_hist, 0, sizeof(m_hist)); + } + + hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + m_hist[0] = (uint8_t)i; + m_hist[1] = (uint8_t)j; + m_hist[2] = (uint8_t)k; + m_hist[3] = (uint8_t)l; + } + + inline bool operator== (const hist4& h) const + { + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; + if (m_hist[3] != h.m_hist[3]) return false; + return true; + } + + inline bool any_16() const + { + return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16); + } + + inline uint32_t lookup_total_ordering_index() const + { + if (m_hist[0] == 16) + return TOTAL_ORDER_4_0_16; + else if (m_hist[1] == 16) + return TOTAL_ORDER_4_1_16; + else if (m_hist[2] == 16) + return TOTAL_ORDER_4_2_16; + else if (m_hist[3] == 16) + return TOTAL_ORDER_4_3_16; + + // Must sum to 16, so m_hist[3] isn't needed. + return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)]; + } + }; + + struct hist3 + { + uint8_t m_hist[3]; + + hist3() + { + memset(m_hist, 0, sizeof(m_hist)); + } + + hist3(uint32_t i, uint32_t j, uint32_t k) + { + m_hist[0] = (uint8_t)i; + m_hist[1] = (uint8_t)j; + m_hist[2] = (uint8_t)k; + } + + inline bool operator== (const hist3& h) const + { + if (m_hist[0] != h.m_hist[0]) return false; + if (m_hist[1] != h.m_hist[1]) return false; + if (m_hist[2] != h.m_hist[2]) return false; + return true; + } + + inline bool any_16() const + { + return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16); + } + + inline uint32_t lookup_total_ordering_index() const + { + if (m_hist[0] == 16) + return TOTAL_ORDER_3_0_16; + else if (m_hist[1] == 16) + return TOTAL_ORDER_3_1_16; + else if (m_hist[2] == 16) + return TOTAL_ORDER_3_2_16; + + // Must sum to 16, so m_hist[2] isn't needed. + return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)]; + } + }; + + struct bc1_match_entry + { + uint8_t m_hi; + uint8_t m_lo; + uint8_t m_e; + }; + + static bc1_approx_mode g_bc1_approx_mode; + static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; + static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; + +#ifndef NDEBUG // only used in asserts + static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } + static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } +#endif + + // v0, v1 = unexpanded DXT1 endpoint values (5/6-bits) + // c0, c1 = expanded DXT1 endpoint values (8-bits) + static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; } + static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; } + static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; } + + static inline int interp_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; } + static inline int interp_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } + + static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; } + static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff / 4 + 128 + gdiff * 128) / 256; } + + static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } + static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + + static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + } + } + + static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + (void)v0; (void)v1; + assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); + default: + case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); + case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + } + } + + static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); + case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); + } + } + + static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) + { + (void)v0; (void)v1; + assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + switch (mode) + { + case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); + case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + default: + return interp_half_5_6_ideal(c0, c1); + } + } + + static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size; lo++) + { + const int lo_e = pExpand[lo]; + + for (int hi = 0; hi < size; hi++) + { + const int hi_e = pExpand[hi]; + + const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode); + + int e = iabs(v - i); + + // We only need to factor in 3% error in BC1 ideal mode. + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) + e += (iabs(hi_e - lo_e) * 3) / 100; + + // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. + if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) + { + pTable[i].m_hi = static_cast(hi); + pTable[i].m_lo = static_cast(lo); + + assert(e <= UINT8_MAX); + pTable[i].m_e = static_cast(e); + + lowest_e = e; + } + + } // hi + } // lo + } + } + + static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) + { + for (int i = 0; i < 256; i++) + { + int lowest_e = 256; + for (int lo = 0; lo < size; lo++) + { + const int lo_e = pExpand[lo]; + + for (int hi = 0; hi < size; hi++) + { + const int hi_e = pExpand[hi]; + + const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode); + + int e = iabs(v - i); + + if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) + e += (iabs(hi_e - lo_e) * 3) / 100; + + // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. + if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) + { + pTable[i].m_hi = static_cast(hi); + pTable[i].m_lo = static_cast(lo); + + assert(e <= UINT8_MAX); + pTable[i].m_e = static_cast(e); + + lowest_e = e; + } + + } // hi + } // lo + } + } + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; + + // multiplier is 4 for 3-color + static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 }; + + static inline void compute_selector_factors4(const hist4& h, float& iz00, float& iz10, float& iz11) + { + uint32_t weight_accum = 0; + for (uint32_t sel = 0; sel < 4; sel++) + weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + det = 0.0f; + else + det = (3.0f / 255.0f) / det; + + iz00 = z11 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + } + + static inline void compute_selector_factors3(const hist3& h, float& iz00, float& iz10, float& iz11) + { + uint32_t weight_accum = 0; + for (uint32_t sel = 0; sel < 3; sel++) + weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + det = 0.0f; + else + det = (2.0f / 255.0f) / det; + + iz00 = z11 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + } + + static bool g_initialized; + + void init(bc1_approx_mode mode) + { + g_bc1_approx_mode = mode; + + uint8_t bc1_expand5[32]; + for (int i = 0; i < 32; i++) + bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); + prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode); + prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode); + + uint8_t bc1_expand6[64]; + for (int i = 0; i < 64; i++) + bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); + prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode); + prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode); + + for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++) + { + hist4 h; + h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0]; + h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1]; + h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2]; + h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3]; + + if (!h.any_16()) + { + const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8); + assert(index < 4096); + g_total_ordering4_hash[index] = (uint16_t)i; + } + + compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]); + } + + for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++) + { + hist3 h; + h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0]; + h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1]; + h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2]; + + if (!h.any_16()) + { + const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4); + assert(index < 256); + g_total_ordering3_hash[index] = (uint16_t)i; + } + + compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]); + } + + g_initialized = true; + } + + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color) + { + bc1_block* pDst_block = static_cast(pDst); + + uint32_t mask = 0xAA; + int max16 = -1, min16 = 0; + + if (allow_3color) + { + const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e; + const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e; + + if (err3 < err4) + { + max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi; + min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo; + + if (max16 > min16) + std::swap(max16, min16); + } + } + + if (max16 == -1) + { + max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; + min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; + + if (min16 == max16) + { + // Always forbid 3 color blocks + // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. + mask = 0; + + // Make l > h + if (min16 > 0) + min16--; + else + { + // l = h = 0 + assert(min16 == max16 && max16 == 0); + + max16 = 1; + min16 = 0; + mask = 0x55; + } + + assert(max16 > min16); + } + + if (max16 < min16) + { + std::swap(max16, min16); + mask ^= 0x55; + } + } + + pDst_block->set_low_color(static_cast(max16)); + pDst_block->set_high_color(static_cast(min16)); + pDst_block->m_selectors[0] = static_cast(mask); + pDst_block->m_selectors[1] = static_cast(mask); + pDst_block->m_selectors[2] = static_cast(mask); + pDst_block->m_selectors[3] = static_cast(mask); + } + + static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f }; + static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f }; + + struct vec3F { float c[3]; }; + + static inline void compute_least_squares_endpoints4_rgb( + vec3F* pXl, vec3F* pXh, + int total_r, int total_g, int total_b, + float iz00, float iz10, float iz11, + uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) + { + const float iz01 = iz10; + + const uint32_t f1 = g_unique_total_orders4[s][0]; + const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1]; + const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2]; + uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3; + uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3; + uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3; + + float q10_r = (float)(total_r * 3 - uq00_r); + float q10_g = (float)(total_g * 3 - uq00_g); + float q10_b = (float)(total_b * 3 - uq00_b); + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + } + + static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b) + { + uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; + uint32_t weight_accum = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + const uint8_t sel = pSelectors[i]; + + weight_accum += g_weight_vals4[sel]; + uq00_r += sel * r; + uq00_g += sel * g; + uq00_b += sel * b; + } + + int q10_r = total_r * 3 - uq00_r; + int q10_g = total_g * 3 - uq00_g; + int q10_b = total_b * 3 - uq00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = (3.0f / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + + return true; + } + + static inline void compute_least_squares_endpoints3_rgb( + vec3F* pXl, vec3F* pXh, + int total_r, int total_g, int total_b, + float iz00, float iz10, float iz11, + uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) + { + const float iz01 = iz10; + + // Compensates for BC1 3-color ordering, which is selector 0, 2, 1 + const uint32_t f1 = g_unique_total_orders3[s][0]; + const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2]; + uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]); + uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]); + uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]); + + float q10_r = (float)(total_r * 2 - uq00_r); + float q10_g = (float)(total_g * 2 - uq00_g); + float q10_b = (float)(total_b * 2 - uq00_b); + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + } + + static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) + { + int uq00_r = 0, uq00_g = 0, uq00_b = 0; + uint32_t weight_accum = 0; + int total_r = 0, total_g = 0, total_b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; + if (use_black) + { + if ((r | g | b) < 4) + continue; + } + + const uint8_t sel = pSelectors[i]; + assert(sel <= 3); + if (sel == 3) + continue; + + weight_accum += g_weight_vals3[sel]; + + static const uint8_t s_tran[3] = { 0, 2, 1 }; + const uint8_t tsel = s_tran[sel]; + uq00_r += tsel * r; + uq00_g += tsel * g; + uq00_b += tsel * b; + + total_r += r; + total_g += g; + total_b += b; + } + + int q10_r = total_r * 2 - uq00_r; + int q10_g = total_g * 2 - uq00_g; + int q10_b = total_b * 2 - uq00_b; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) + return false; + + det = (2.0f / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; + pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; + + pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; + pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; + + pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; + pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; + + return true; + } + + static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) + { + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); + + if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) + { + block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4) + { + block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3; block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3; block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3; + block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3; block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3; block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) + { + block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]); + block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]); + } + else + { + block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb); + block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb); + } + } + + static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) + { + block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); + block_r[1] = (hr << 3) | (hr >> 2); block_g[1] = (hg << 2) | (hg >> 4); block_b[1] = (hb << 3) | (hb >> 2); + + if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)) + { + block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2; + } + else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) + { + block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]); + } + else + { + block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb); + } + } + + static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], const uint8_t* pForce_selectors) + { + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + return; + } + + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; + + sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + sels[i + 2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + sels[i + 3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + } + } + + static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; + + int dots[4]; + for (uint32_t i = 0; i < 4; i++) + dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; + + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; + + ar *= 2; ag *= 2; ab *= 2; + + static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i += 4) + { + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; + + uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; + uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; + uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; + uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; + + sels[i + 0] = sel0; + sels[i + 1] = sel1; + sels[i + 2] = sel2; + sels[i + 3] = sel3; + + total_err += squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]); + total_err += squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]); + total_err += squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]); + total_err += squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]); + + if (total_err >= cur_err) + break; + } + + return total_err; + } + + static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0]; + + const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f); + sel = clampi(sel, 1, 3); + + uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b); + uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + + int best_sel = sel; + uint32_t best_err = err1; + if (err0 == err1) + { + // Prefer non-interpolation + if ((best_sel - 1) == 0) + best_sel = 0; + } + else if (err0 < best_err) + { + best_sel = sel - 1; + best_err = err0; + } + + total_err += best_err; + + if (total_err >= cur_err) + break; + + sels[i] = (uint8_t)best_sel; + } + return total_err; + } + + static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + uint32_t total_err = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); + uint8_t best_sel = 0; + + for (uint32_t j = 1; (j < 4) && best_err; j++) + { + uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b); + if ((err < best_err) || ((err == best_err) && (j == 3))) + { + best_err = err; + best_sel = (uint8_t)j; + } + } + + total_err += best_err; + + if (total_err >= cur_err) + break; + + sels[i] = (uint8_t)best_sel; + } + return total_err; + } + + static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors) + { + uint32_t err; + + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + err = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + const uint32_t sel = pForce_selectors[i]; + assert(sel <= 3); + + err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + } + } + else + { + if (flags & cEncodeBC1UseFasterMSEEval) + err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + else if (flags & cEncodeBC1UseFullMSEEval) + err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + else + err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); + } + + return err; + } + + static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err, const uint8_t* pForce_selectors) + { + uint32_t block_r[4], block_g[4], block_b[4]; + bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + uint32_t total_err = 0; + + if (pForce_selectors) + { + memcpy(sels, pForce_selectors, 16); + + //uint32_t block_r[4], block_g[4], block_b[4]; + //bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); + + block_r[3] = 0; block_g[3] = 0; block_b[3] = 0; + + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + const uint32_t sel = pForce_selectors[i]; + assert(sel <= 3); + + total_err += squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); + } + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; + + uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); + uint32_t best_sel = 0; + + uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b); + if (err1 < best_err) + { + best_err = err1; + best_sel = 1; + } + + uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b); + if (err2 < best_err) + { + best_err = err2; + best_sel = 2; + } + + if (use_black) + { + uint32_t err3 = squarei(r) + squarei(g) + squarei(b); + if (err3 < best_err) + { + best_err = err3; + best_sel = 3; + } + } + + total_err += best_err; + if (total_err >= cur_err) + return total_err; + + sels[i] = (uint8_t)best_sel; + } + } + + return total_err; + } + + static inline void precise_round_565(const vec3F& xl, const vec3F& xh, + int& trial_lr, int& trial_lg, int& trial_lb, + int& trial_hr, int& trial_hg, int& trial_hb) + { + trial_lr = (int)(xl.c[0] * 31.0f); + trial_lg = (int)(xl.c[1] * 63.0f); + trial_lb = (int)(xl.c[2] * 31.0f); + + trial_hr = (int)(xh.c[0] * 31.0f); + trial_hg = (int)(xh.c[1] * 63.0f); + trial_hb = (int)(xh.c[2] * 31.0f); + + if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U) + { + trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr; + trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr; + + trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb; + trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb; + } + + if ((uint32_t)(trial_lg | trial_hg) > 63U) + { + trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg; + trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg; + } + + trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31; + trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63; + trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31; + + trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31; + trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63; + trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31; + } + + static inline void precise_round_565_noscale(vec3F xl, vec3F xh, + int& trial_lr, int& trial_lg, int& trial_lb, + int& trial_hr, int& trial_hg, int& trial_hb) + { + xl.c[0] *= 1.0f / 255.0f; + xl.c[1] *= 1.0f / 255.0f; + xl.c[2] *= 1.0f / 255.0f; + + xh.c[0] *= 1.0f / 255.0f; + xh.c[1] *= 1.0f / 255.0f; + xh.c[2] *= 1.0f / 255.0f; + + precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb); + } + + static inline void bc1_encode4(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) + { + uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); + + // Always forbid 3 color blocks + if (lc16 == hc16) + { + uint8_t mask = 0; + + // Make l > h + if (hc16 > 0) + hc16--; + else + { + // lc16 = hc16 = 0 + assert(lc16 == hc16 && hc16 == 0); + + hc16 = 0; + lc16 = 1; + mask = 0x55; // select hc16 + } + + assert(lc16 > hc16); + pDst_block->set_low_color(static_cast(lc16)); + pDst_block->set_high_color(static_cast(hc16)); + + pDst_block->m_selectors[0] = mask; + pDst_block->m_selectors[1] = mask; + pDst_block->m_selectors[2] = mask; + pDst_block->m_selectors[3] = mask; + } + else + { + uint8_t invert_mask = 0; + if (lc16 < hc16) + { + std::swap(lc16, hc16); + invert_mask = 0x55; + } + + assert(lc16 > hc16); + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); + + pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + } + } + + static inline void bc1_encode3(bc1_block* pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) + { + uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); + uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); + + bool invert_flag = false; + if (lc16 > hc16) + { + std::swap(lc16, hc16); + invert_flag = true; + } + + assert(lc16 <= hc16); + + pDst_block->set_low_color((uint16_t)lc16); + pDst_block->set_high_color((uint16_t)hc16); + + uint32_t packed_sels = 0; + + if (invert_flag) + { + static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 }; + + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); + } + else + { + for (uint32_t i = 0; i < 16; i++) + packed_sels |= ((uint32_t)sels[i] << (i * 2)); + } + + pDst_block->m_selectors[0] = (uint8_t)packed_sels; + pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8); + pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16); + pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24); + } + + struct bc1_encode_results + { + int lr, lg, lb; + int hr, hg, hb; + uint8_t sels[16]; + bool m_3color; + }; + + static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err, bc1_encode_results& results, const uint8_t* pForce_selectors) + { + int total_r = 0, total_g = 0, total_b = 0; + int max_r = 0, max_g = 0, max_b = 0; + int min_r = 255, min_g = 255, min_b = 255; + int total_pixels = 0; + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + if ((r | g | b) < 4) + continue; + + max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); + min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); + total_r += r; total_g += g; total_b += b; + + total_pixels++; + } + + if (!total_pixels) + return false; + + int half_total_pixels = total_pixels >> 1; + int avg_r = (total_r + half_total_pixels) / total_pixels; + int avg_g = (total_g + half_total_pixels) / total_pixels; + int avg_b = (total_b + half_total_pixels) / total_pixels; + + uint32_t low_c = 0, high_c = 0; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r; + int g = (int)pSrc_pixels[i].g; + int b = (int)pSrc_pixels[i].b; + + if ((r | g | b) < 4) + continue; + + r -= avg_r; + g -= avg_g; + b -= avg_b; + + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + + if (icov[2] < 0) + xr = -xr; + + if (icov[4] < 0) + xg = -xg; + + for (uint32_t power_iter = 0; power_iter < 4; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + if (k >= 2) + { + float m = 1024.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } + + int low_dot = INT_MAX, high_dot = INT_MIN; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b; + + if ((r | g | b) < 4) + continue; + + int dot = r * saxis_r + g * saxis_g + b * saxis_b; + if (dot < low_dot) + { + low_dot = dot; + low_c = i; + } + if (dot > high_dot) + { + high_dot = dot; + high_c = i; + } + } + + int lr = to_5(pSrc_pixels[low_c].r); + int lg = to_6(pSrc_pixels[low_c].g); + int lb = to_5(pSrc_pixels[low_c].b); + + int hr = to_5(pSrc_pixels[high_c].r); + int hg = to_6(pSrc_pixels[high_c].g); + int hb = to_5(pSrc_pixels[high_c].b); + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors); + + if (trial_err) + { + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t trials = 0; trials < total_ls_passes; trials++) + { + vec3F xl, xh; + int lr2, lg2, lb2, hr2, hg2, hb2; + if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh)) + { + lr2 = g_bc1_match5_half[avg_r].m_hi; + lg2 = g_bc1_match6_half[avg_g].m_hi; + lb2 = g_bc1_match5_half[avg_b].m_hi; + + hr2 = g_bc1_match5_half[avg_r].m_lo; + hg2 = g_bc1_match6_half[avg_g].m_lo; + hb2 = g_bc1_match5_half[avg_b].m_lo; + } + else + { + precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); + } + + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) + break; + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + lr = lr2; lg = lg2; lb = lb2; + hr = hr2; hg = hg2; hb = hb2; + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + else + break; + } + } + + if (trial_err < cur_err) + { + results.m_3color = true; + results.lr = lr; + results.lg = lg; + results.lb = lb; + results.hr = hr; + results.hg = hg; + results.hb = hb; + memcpy(results.sels, trial_sels, 16); + + cur_err = trial_err; + + return true; + } + + return false; + } + + static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t& cur_err, + int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try, + bc1_encode_results& results, const uint8_t* pForce_selectors) + { + if (pForce_selectors) + { + for (uint32_t i = 0; i < 16; i++) + if (pForce_selectors[i] == 3) + return false; + } + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX, pForce_selectors); + + if (trial_err) + { + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t trials = 0; trials < total_ls_passes; trials++) + { + vec3F xl, xh; + int lr2, lg2, lb2, hr2, hg2, hb2; + if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh)) + { + lr2 = g_bc1_match5_half[avg_r].m_hi; + lg2 = g_bc1_match6_half[avg_g].m_hi; + lb2 = g_bc1_match5_half[avg_b].m_hi; + + hr2 = g_bc1_match5_half[avg_r].m_lo; + hg2 = g_bc1_match6_half[avg_g].m_lo; + hb2 = g_bc1_match5_half[avg_b].m_lo; + } + else + { + precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); + } + + if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) + break; + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + lr = lr2; lg = lg2; lb = lb2; + hr = hr2; hg = hg2; hb = hb2; + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + else + break; + } + } + + if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try)) + { + hist3 h; + for (uint32_t i = 0; i < 16; i++) + { + assert(trial_sels[i] < 3); + h.m_hist[trial_sels[i]]++; + } + + const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); + + int r0, g0, b0, r3, g3, b3; + r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); + r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); + + int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; + + int dots[16]; + for (uint32_t i = 0; i < 16; i++) + { + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; + int d = 0x1000000 + (r * ar + g * ag + b * ab); + assert(d >= 0); + dots[i] = (d << 4) + i; + } + + std::sort(dots, dots + 16); + + uint32_t r_sum[17], g_sum[17], b_sum[17]; + uint32_t r = 0, g = 0, b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t p = dots[i] & 15; + + r_sum[i] = r; + g_sum[i] = g; + b_sum[i] = b; + + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; + } + + r_sum[16] = total_r; + g_sum[16] = total_g; + b_sum[16] = total_b; + + const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3); + for (uint32_t q = 0; q < q_total; q++) + { + const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q]; + + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + + if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16)) + { + trial_lr = g_bc1_match5_half[avg_r].m_hi; + trial_lg = g_bc1_match6_half[avg_g].m_hi; + trial_lb = g_bc1_match5_half[avg_b].m_hi; + + trial_hr = g_bc1_match5_half[avg_r].m_lo; + trial_hg = g_bc1_match6_half[avg_g].m_lo; + trial_hb = g_bc1_match5_half[avg_b].m_lo; + } + else + { + compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b, + g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum); + + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + uint8_t trial_sels2[16]; + uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX, pForce_selectors); + + if (trial_err2 < trial_err) + { + trial_err = trial_err2; + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); + } + + } // s + } + + if (trial_err < cur_err) + { + results.m_3color = true; + results.lr = lr; + results.lg = lg; + results.lb = lb; + results.hr = hr; + results.hg = hg; + results.hb = hb; + memcpy(results.sels, trial_sels, 16); + + cur_err = trial_err; + + return true; + } + + return false; + } + + void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black, const uint8_t* pForce_selectors) + { + uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1; + + static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32"); + static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32"); + + switch (level) + { + case 0: + // Faster/higher quality than stb_dxt default. + flags = cEncodeBC1BoundingBoxInt; + break; + case 1: + // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. + flags = cEncodeBC1Use2DLS; + break; + case 2: + // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). + // Slightly stronger than stb_dxt. + flags = 0; + break; + case 3: + // Slightly stronger than stb_dxt HIGHQUAL. + flags = cEncodeBC1TwoLeastSquaresPasses; + break; + case 4: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; + break; + default: + case 5: + // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 6: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + break; + case 7: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 4; + break; + case 8: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 8; + break; + case 9: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 11; + total_orderings3 = 3; + break; + case 10: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 20; + total_orderings3 = 8; + break; + case 11: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 28; + total_orderings3 = 16; + break; + case 12: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 13: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 14: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + case 15: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); + total_orderings3 = 32; + break; + case 16: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; + total_orderings3 = 32; + break; + case 17: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 18: + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = MAX_TOTAL_ORDERINGS4; + total_orderings3 = 32; + break; + case 19: + // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. + flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; + flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); + total_orderings4 = 32; + total_orderings3 = 32; + break; + } + + encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3, pForce_selectors); + } + + static inline void encode_bc1_pick_initial(const color32* pSrc_pixels, uint32_t flags, bool grayscale_flag, + int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, + int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, + int& lr, int& lg, int& lb, int& hr, int& hg, int& hb) + { + if (grayscale_flag) + { + const int fr = pSrc_pixels[0].r; + + // Grayscale blocks are a common enough case to specialize. + if ((max_r - min_r) < 2) + { + lr = lb = hr = hb = to_5(fr); + lg = hg = to_6(fr); + } + else + { + lr = lb = to_5(min_r); + lg = to_6(min_r); + + hr = hb = to_5(max_r); + hg = to_6(max_r); + } + } + else if (flags & cEncodeBC1Use2DLS) + { + // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. + int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r; + if ((max_g - min_g) > (max_chan_val - min_chan_val)) + big_chan = 1, min_chan_val = min_g, max_chan_val = max_g; + + if ((max_b - min_b) > (max_chan_val - min_chan_val)) + big_chan = 2, min_chan_val = min_b, max_chan_val = max_b; + + int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0; + vec3F l, h; + if (big_chan == 0) + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b; + } + + int sum_x = total_r; + int sum_x2 = sum_xy_r; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_y = 0.0f, b_z = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_y = (16 * sum_xy_g - sum_x * total_g) * div; + b_z = (16 * sum_xy_b - sum_x * total_b) * div; + } + + float a_y = (total_g - b_y * sum_x) / 16.0f; + float a_z = (total_b - b_z * sum_x) / 16.0f; + + l.c[1] = a_y + b_y * min_chan_val; + l.c[2] = a_z + b_z * min_chan_val; + + h.c[1] = a_y + b_y * max_chan_val; + h.c[2] = a_z + b_z * max_chan_val; + + float dg = (h.c[1] - l.c[1]); + float db = (h.c[2] - l.c[2]); + + h.c[1] = l.c[1] + dg * (15.0f / 16.0f); + h.c[2] = l.c[2] + db * (15.0f / 16.0f); + + l.c[1] = l.c[1] + dg * (1.0f / 16.0f); + l.c[2] = l.c[2] + db * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[0] = fmin_chan_val; + h.c[0] = fmax_chan_val; + } + else if (big_chan == 1) + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b; + } + + int sum_x = total_g; + int sum_x2 = sum_xy_g; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_x = 0.0f, b_z = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_x = (16 * sum_xy_r - sum_x * total_r) * div; + b_z = (16 * sum_xy_b - sum_x * total_b) * div; + } + + float a_x = (total_r - b_x * sum_x) / 16.0f; + float a_z = (total_b - b_z * sum_x) / 16.0f; + + l.c[0] = a_x + b_x * min_chan_val; + l.c[2] = a_z + b_z * min_chan_val; + + h.c[0] = a_x + b_x * max_chan_val; + h.c[2] = a_z + b_z * max_chan_val; + + float dr = (h.c[0] - l.c[0]); + float db = (h.c[2] - l.c[2]); + + h.c[0] = l.c[0] + dr * (15.0f / 16.0f); + h.c[2] = l.c[2] + db * (15.0f / 16.0f); + + l.c[0] = l.c[0] + dr * (1.0f / 16.0f); + l.c[2] = l.c[2] + db * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[1] = fmin_chan_val; + h.c[1] = fmax_chan_val; + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b; + } + + int sum_x = total_b; + int sum_x2 = sum_xy_b; + + float div = (float)(16 * sum_x2 - sum_x * sum_x); + float b_x = 0.0f, b_y = 0.0f; + if (fabs(div) > 1e-8f) + { + div = 1.0f / div; + b_x = (16 * sum_xy_r - sum_x * total_r) * div; + b_y = (16 * sum_xy_g - sum_x * total_g) * div; + } + + float a_x = (total_r - b_x * sum_x) / 16.0f; + float a_y = (total_g - b_y * sum_x) / 16.0f; + + l.c[0] = a_x + b_x * min_chan_val; + l.c[1] = a_y + b_y * min_chan_val; + + h.c[0] = a_x + b_x * max_chan_val; + h.c[1] = a_y + b_y * max_chan_val; + + float dr = (h.c[0] - l.c[0]); + float dg = (h.c[1] - l.c[1]); + + h.c[0] = l.c[0] + dr * (15.0f / 16.0f); + h.c[1] = l.c[1] + dg * (15.0f / 16.0f); + + l.c[0] = l.c[0] + dr * (1.0f / 16.0f); + l.c[1] = l.c[1] + dg * (1.0f / 16.0f); + + float d = (float)(max_chan_val - min_chan_val); + float fmin_chan_val = min_chan_val + d * (1.0f / 16.0f); + float fmax_chan_val = min_chan_val + d * (15.0f / 16.0f); + + l.c[2] = fmin_chan_val; + h.c[2] = fmax_chan_val; + } + + precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb); + } + else if (flags & cEncodeBC1BoundingBox) + { + // Algorithm from icbc.h compress_dxt1_fast() + vec3F l, h; + l.c[0] = min_r * (1.0f / 255.0f); + l.c[1] = min_g * (1.0f / 255.0f); + l.c[2] = min_b * (1.0f / 255.0f); + + h.c[0] = max_r * (1.0f / 255.0f); + h.c[1] = max_g * (1.0f / 255.0f); + h.c[2] = max_b * (1.0f / 255.0f); + + const float bias = 8.0f / 255.0f; + float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f / 16.0f); + float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f / 16.0f); + float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f / 16.0f); + + l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f); + l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f); + l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f); + + h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f); + h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f); + h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f); + + int icov_xz = 0, icov_yz = 0; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov_xz += r * b; + icov_yz += g * b; + } + + if (icov_xz < 0) + std::swap(l.c[0], h.c[0]); + + if (icov_yz < 0) + std::swap(l.c[1], h.c[1]); + + precise_round_565(l, h, lr, lg, lb, hr, hg, hb); + } + else if (flags & cEncodeBC1BoundingBoxInt) + { + // Algorithm from icbc.h compress_dxt1_fast(), but converted to integer. + int inset_r = (max_r - min_r - 8) >> 4; + int inset_g = (max_g - min_g - 8) >> 4; + int inset_b = (max_b - min_b - 8) >> 4; + + min_r += inset_r; + min_g += inset_g; + min_b += inset_b; + if ((uint32_t)(min_r | min_g | min_b) > 255U) + { + min_r = clampi(min_r, 0, 255); + min_g = clampi(min_g, 0, 255); + min_b = clampi(min_b, 0, 255); + } + + max_r -= inset_r; + max_g -= inset_g; + max_b -= inset_b; + if ((uint32_t)(max_r | max_g | max_b) > 255U) + { + max_r = clampi(max_r, 0, 255); + max_g = clampi(max_g, 0, 255); + max_b = clampi(max_b, 0, 255); + } + + int icov_xz = 0, icov_yz = 0; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov_xz += r * b; + icov_yz += g * b; + } + + int x0 = min_r; + int y0 = min_g; + int x1 = max_r; + int y1 = max_g; + + if (icov_xz < 0) + std::swap(x0, x1); + + if (icov_yz < 0) + std::swap(y0, y1); + + lr = to_5(x0); + lg = to_6(y0); + lb = to_5(min_b); + + hr = to_5(x1); + hg = to_6(y1); + hb = to_5(max_b); + } + else + { + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) + uint32_t low_c = 0, high_c = 0; + + int icov[6] = { 0, 0, 0, 0, 0, 0 }; + for (uint32_t i = 0; i < 16; i++) + { + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; + icov[0] += r * r; + icov[1] += r * g; + icov[2] += r * b; + icov[3] += g * g; + icov[4] += g * b; + icov[5] += b * b; + } + + int saxis_r = 306, saxis_g = 601, saxis_b = 117; + + float xr = (float)(max_r - min_r); + float xg = (float)(max_g - min_g); + float xb = (float)(max_b - min_b); + + if (icov[2] < 0) + xr = -xr; + + if (icov[4] < 0) + xg = -xg; + + float cov[6]; + for (uint32_t i = 0; i < 6; i++) + cov[i] = (float)(icov[i]) * (1.0f / 255.0f); + + const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4; + for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++) + { + float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; + float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; + float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; + xr = r; xg = g; xb = b; + } + + float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); + if (k >= 2) + { + float m = 2048.0f / k; + saxis_r = (int)(xr * m); + saxis_g = (int)(xg * m); + saxis_b = (int)(xb * m); + } + + int low_dot = INT_MAX, high_dot = INT_MIN; + + saxis_r = (int)((uint32_t)saxis_r << 4U); + saxis_g = (int)((uint32_t)saxis_g << 4U); + saxis_b = (int)((uint32_t)saxis_b << 4U); + + for (uint32_t i = 0; i < 16; i += 4) + { + int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i; + int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1; + int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2; + int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3; + + int min_d01 = std::min(dot0, dot1); + int max_d01 = std::max(dot0, dot1); + + int min_d23 = std::min(dot2, dot3); + int max_d23 = std::max(dot2, dot3); + + int min_d = std::min(min_d01, min_d23); + int max_d = std::max(max_d01, max_d23); + + low_dot = std::min(low_dot, min_d); + high_dot = std::max(high_dot, max_d); + } + low_c = low_dot & 15; + high_c = high_dot & 15; + + lr = to_5(pSrc_pixels[low_c].r); + lg = to_6(pSrc_pixels[low_c].g); + lb = to_5(pSrc_pixels[low_c].b); + + hr = to_5(pSrc_pixels[high_c].r); + hg = to_6(pSrc_pixels[high_c].g); + hb = to_5(pSrc_pixels[high_c].b); + } + } + + static const int8_t s_adjacent_voxels[16][4] = + { + { 1,0,0, 3 }, // 0 + { 0,1,0, 4 }, // 1 + { 0,0,1, 5 }, // 2 + { -1,0,0, 0 }, // 3 + { 0,-1,0, 1 }, // 4 + { 0,0,-1, 2 }, // 5 + { 1,1,0, 9 }, // 6 + { 1,0,1, 10 }, // 7 + { 0,1,1, 11 }, // 8 + { -1,-1,0, 6 }, // 9 + { -1,0,-1, 7 }, // 10 + { 0,-1,-1, 8 }, // 11 + { -1,1,0, 13 }, // 12 + { 1,-1,0, 12 }, // 13 + { 0,-1,1, 15 }, // 14 + { 0,1,-1, 14 }, // 15 + }; + + // From icbc's high quality mode. + static inline void encode_bc1_endpoint_search(const color32* pSrc_pixels, bool any_black_pixels, + uint32_t flags, bc1_encode_results& results, uint32_t cur_err, const uint8_t* pForce_selectors) + { + int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb; + uint8_t* sels = results.sels; + + int prev_improvement_index = 0, forbidden_direction = -1; + + const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift; + for (int i = 0; i < endpoint_search_rounds; i++) + { + assert(s_adjacent_voxels[s_adjacent_voxels[i & 15][3]][3] == (i & 15)); + + if (forbidden_direction == (i & 31)) + continue; + + const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] }; + + int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb; + + if ((i >> 4) & 1) + { + trial_lr = clampi(trial_lr + delta[0], 0, 31); + trial_lg = clampi(trial_lg + delta[1], 0, 63); + trial_lb = clampi(trial_lb + delta[2], 0, 31); + } + else + { + trial_hr = clampi(trial_hr + delta[0], 0, 31); + trial_hg = clampi(trial_hg + delta[1], 0, 63); + trial_hb = clampi(trial_hb + delta[2], 0, 31); + } + + uint8_t trial_sels[16]; + + uint32_t trial_err; + if (results.m_3color) + { + trial_err = bc1_find_sels3_fullerr( + ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)), + pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + } + else + { + trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + } + + if (trial_err < cur_err) + { + cur_err = trial_err; + + forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16); + + lr = trial_lr, lg = trial_lg, lb = trial_lb, hr = trial_hr, hg = trial_hg, hb = trial_hb; + + memcpy(sels, trial_sels, 16); + + prev_improvement_index = i; + } + + if (i - prev_improvement_index > 32) + break; + } + } + + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3, const uint8_t* pForce_selectors) + { + assert(g_initialized); + + const color32* pSrc_pixels = (const color32*)pPixels; + bc1_block* pDst_block = static_cast(pDst); + + int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b; + + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; + + uint32_t j; + for (j = 15; j >= 1; --j) + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) + break; + + if (j == 0) + { + encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); + return; + } + + int total_r = fr, total_g = fg, total_b = fb; + + max_r = fr, max_g = fg, max_b = fb; + min_r = fr, min_g = fg, min_b = fb; + + uint32_t grayscale_flag = (fr == fg) && (fr == fb); + uint32_t any_black_pixels = (fr | fg | fb) < 4; + + for (uint32_t i = 1; i < 16; i++) + { + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; + + grayscale_flag &= ((r == g) && (r == b)); + any_black_pixels |= ((r | g | b) < 4); + + max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); + min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); + total_r += r; total_g += g; total_b += b; + } + + avg_r = (total_r + 8) >> 4, avg_g = (total_g + 8) >> 4, avg_b = (total_b + 8) >> 4; + + bc1_encode_results results; + results.m_3color = false; + + uint8_t* sels = results.sels; + int& lr = results.lr, & lg = results.lg, & lb = results.lb, & hr = results.hr, & hg = results.hg, & hb = results.hb; + int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0; + + lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; + + const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) || + (any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)); + + uint32_t cur_err = UINT32_MAX; + + if (!needs_block_error) + { + assert((flags & cEncodeBC1TryAllInitialEndponts) == 0); + + encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0, + min_r, min_g, min_b, max_r, max_g, max_b, + avg_r, avg_g, avg_b, total_r, total_g, total_b, + lr, lg, lb, hr, hg, hb); + + orig_lr = lr, orig_lg = lg, orig_lb = lb, orig_hr = hr, orig_hg = hg, orig_hb = hb; + + bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, pForce_selectors); + + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b)) + { + // All selectors equal - treat it as a solid block which should always be equal or better. + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) + break; + + bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels, pForce_selectors); + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + } // ls_pass + } + else + { + const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; + for (uint32_t round = 0; round < total_rounds; round++) + { + uint32_t modified_flags = flags; + if (round == 1) + { + modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox); + modified_flags |= cEncodeBC1BoundingBox; + } + + int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb; + uint8_t round_sels[16]; + + encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0, + min_r, min_g, min_b, max_r, max_g, max_b, + avg_r, avg_g, avg_b, total_r, total_g, total_b, + round_lr, round_lg, round_lb, round_hr, round_hg, round_hb); + + int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb; + + uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX, pForce_selectors); + + const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; + for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) + { + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b)) + { + // All selectors equal - treat it as a solid block which should always be equal or better. + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + + // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. + } + else + { + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb)) + break; + + uint8_t trial_sels[16]; + uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err, pForce_selectors); + + if (trial_err < round_err) + { + round_lr = trial_lr; + round_lg = trial_lg; + round_lb = trial_lb; + + round_hr = trial_hr; + round_hg = trial_hg; + round_hb = trial_hb; + + round_err = trial_err; + memcpy(round_sels, trial_sels, 16); + } + else + break; + + } // ls_pass + + if (round_err <= cur_err) + { + cur_err = round_err; + + lr = round_lr; + lg = round_lg; + lb = round_lb; + hr = round_hr; + hg = round_hg; + hb = round_hb; + + orig_lr = orig_round_lr; + orig_lg = orig_round_lg; + orig_lb = orig_round_lb; + orig_hr = orig_round_hr; + orig_hg = orig_round_hg; + orig_hb = orig_round_hb; + + memcpy(sels, round_sels, 16); + } + + } // round + } + + if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings)) + { + assert(needs_block_error); + + const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1; + for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++) + { + const uint32_t orig_err = cur_err; + + hist4 h; + for (uint32_t i = 0; i < 16; i++) + { + assert(sels[i] < 4); + h.m_hist[sels[i]]++; + } + + const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); + + int r0, g0, b0, r3, g3, b3; + r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); + r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); + + int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; + + int dots[16]; + for (uint32_t i = 0; i < 16; i++) + { + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; + int d = 0x1000000 + (r * ar + g * ag + b * ab); + assert(d >= 0); + dots[i] = (d << 4) + i; + } + + std::sort(dots, dots + 16); + + uint32_t r_sum[17], g_sum[17], b_sum[17]; + uint32_t r = 0, g = 0, b = 0; + for (uint32_t i = 0; i < 16; i++) + { + const uint32_t p = dots[i] & 15; + + r_sum[i] = r; + g_sum[i] = g; + b_sum[i] = b; + + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; + } + + r_sum[16] = total_r; + g_sum[16] = total_g; + b_sum[16] = total_b; + + const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4); + for (uint32_t q = 0; q < q_total; q++) + { + const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q]; + + int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; + + vec3F xl, xh; + + if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16)) + { + trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; + trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; + trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; + + trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; + trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; + trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; + } + else + { + compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b, + g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum); + + precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); + } + + uint8_t trial_sels[16]; + + uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err, pForce_selectors); + + if (trial_err < cur_err) + { + cur_err = trial_err; + + lr = trial_lr; + lg = trial_lg; + lb = trial_lb; + + hr = trial_hr; + hg = trial_hg; + hb = trial_hb; + + memcpy(sels, trial_sels, 16); + } + + } // s + + if ((!cur_err) || (cur_err == orig_err)) + break; + + } // iter_index + } + + if (((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err)) + { + if (flags & cEncodeBC1Use3ColorBlocks) + { + assert(needs_block_error); + try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results, pForce_selectors); + } + + if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)) + { + assert(needs_block_error); + try_3color_block_useblack(pSrc_pixels, flags, cur_err, results, pForce_selectors); + } + } + + if ((flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err)) + { + assert(needs_block_error); + + encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err, pForce_selectors); + } + + if (results.m_3color) + bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); + else + bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); + } + + // BC3-5 + + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) + { + assert(g_initialized); + + uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v; + + { + min0_v = max0_v = pPixels[0 * stride]; + min1_v = max1_v = pPixels[1 * stride]; + min2_v = max2_v = pPixels[2 * stride]; + min3_v = max3_v = pPixels[3 * stride]; + } + + { + uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + { + uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + { + uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); + uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); + uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); + uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); + } + + const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v); + const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v); + + uint8_t* pDst_bytes = static_cast(pDst); + pDst_bytes[0] = (uint8_t)max_v; + pDst_bytes[1] = (uint8_t)min_v; + + if (max_v == min_v) + { + memset(pDst_bytes + 2, 0, 6); + return; + } + + const uint32_t delta = max_v - min_v; + + // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. + const int t0 = delta * 13; + const int t1 = delta * 11; + const int t2 = delta * 9; + const int t3 = delta * 7; + const int t4 = delta * 5; + const int t5 = delta * 3; + const int t6 = delta * 1; + + // BC4 floors in its divisions, which we compensate for with the 4 bias. + // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). + const int bias = 4 - min_v * 14; + + static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; + static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; + static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; + static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; + + uint64_t a0, a1, a2, a3; + { + const int v0 = pPixels[0 * stride] * 14 + bias; + const int v1 = pPixels[1 * stride] * 14 + bias; + const int v2 = pPixels[2 * stride] * 14 + bias; + const int v3 = pPixels[3 * stride] * 14 + bias; + a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; + a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; + a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; + a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; + } + + { + const int v0 = pPixels[4 * stride] * 14 + bias; + const int v1 = pPixels[5 * stride] * 14 + bias; + const int v2 = pPixels[6 * stride] * 14 + bias; + const int v3 = pPixels[7 * stride] * 14 + bias; + a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); + a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); + a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); + a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); + } + + { + const int v0 = pPixels[8 * stride] * 14 + bias; + const int v1 = pPixels[9 * stride] * 14 + bias; + const int v2 = pPixels[10 * stride] * 14 + bias; + const int v3 = pPixels[11 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); + } + + { + const int v0 = pPixels[12 * stride] * 14 + bias; + const int v1 = pPixels[13 * stride] * 14 + bias; + const int v2 = pPixels[14 * stride] * 14 + bias; + const int v3 = pPixels[15 * stride] * 14 + bias; + a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); + a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); + a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); + a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); + } + + const uint64_t f = a0 | a1 | a2 | a3; + + pDst_bytes[2] = (uint8_t)f; + pDst_bytes[3] = (uint8_t)(f >> 8U); + pDst_bytes[4] = (uint8_t)(f >> 16U); + pDst_bytes[5] = (uint8_t)(f >> 24U); + pDst_bytes[6] = (uint8_t)(f >> 32U); + pDst_bytes[7] = (uint8_t)(f >> 40U); + } + + uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride, uint32_t search_rad, uint32_t mode_flag, const uint8_t* pForce_selectors) + { + assert(mode_flag); + + uint8_t* pDst_bytes = static_cast(pDst); + + uint32_t min_val = 255, max_val = 0; + for (uint32_t i = 0; i < 16; i++) + { + uint32_t val = pPixels[i * stride]; + min_val = std::min(val, min_val); + max_val = std::max(val, max_val); + } + + if (min_val == max_val) + { + if (mode_flag & BC4_USE_MODE6_FLAG) + { + pDst_bytes[0] = (uint8_t)min_val; + pDst_bytes[1] = (uint8_t)min_val; + + memset(pDst_bytes + 2, 0, 6); + + assert(!(pDst_bytes[0] > pDst_bytes[1])); + } + else + { + // Use an 8 value encoding + if (min_val > 0) + { + pDst_bytes[0] = (uint8_t)min_val; + pDst_bytes[1] = (uint8_t)min_val - 1; + + memset(pDst_bytes + 2, 0, 6); + } + else + { + static const uint8_t s_const_1_vals[8] = { 1, 0, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24 }; + memcpy(pDst_bytes, s_const_1_vals, 8); + } + + assert(pDst_bytes[0] > pDst_bytes[1]); + } + +#if defined(_DEBUG) || defined(DEBUG) + { + bc4_block* pBlock = (bc4_block*)pDst; + uint8_t pixels[16]; + unpack_bc4(pDst, pixels, 1); + for (uint32_t i = 0; i < 16; i++) + assert(pixels[i] == min_val); + if (mode_flag & BC4_USE_MODE6_FLAG) + { + assert(pBlock->is_alpha6_block()); + } + else + { + assert(!pBlock->is_alpha6_block()); + } + } +#endif + + return 0; + } + + uint32_t best_err = UINT32_MAX; + for (uint32_t mode = 0; mode < 2; mode++) + { + if ((mode_flag & (1 << mode)) == 0) + continue; + + for (int lo_delta = -(int)search_rad; lo_delta <= (int)search_rad; lo_delta++) + { + for (int hi_delta = -(int)search_rad; hi_delta <= (int)search_rad; hi_delta++) + { + bc4_block trial_block; + trial_block.m_endpoints[0] = (uint8_t)clamp(max_val + hi_delta, 0, 255); + trial_block.m_endpoints[1] = (uint8_t)clamp(min_val + lo_delta, 0, 255); + + if (trial_block.m_endpoints[0] == trial_block.m_endpoints[1]) + continue; + + if (mode == 0) + { + if (trial_block.is_alpha6_block()) + std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + } + else if (!trial_block.is_alpha6_block()) + std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + + uint8_t block_vals[8]; + trial_block.get_block_values(block_vals, trial_block.m_endpoints[0], trial_block.m_endpoints[1]); + + uint32_t trial_err = 0; + uint8_t trial_sels[16]; + + if (pForce_selectors) + { + memcpy(trial_sels, pForce_selectors, 16); + + for (uint32_t i = 0; i < 16; i++) + trial_err += squarei(block_vals[pForce_selectors[i]] - pPixels[i * stride]); + } + else + { + for (uint32_t i = 0; i < 16; i++) + { + uint32_t best_index_err = UINT32_MAX; + uint32_t best_index = 0; + for (uint32_t j = 0; j < 8; j++) + { + uint32_t err = squarei(block_vals[j] - pPixels[i * stride]); + if (err < best_index_err) + { + best_index_err = err; + best_index = j; + if (!err) + break; + } + } + + trial_err += best_index_err; + if (trial_err >= best_err) + break; + + trial_sels[i] = (uint8_t)best_index; + } // i + } + + if (trial_err < best_err) + { + best_err = trial_err; + + uint64_t sel_vals = 0; + for (uint32_t i = 0; i < 16; i++) + sel_vals |= ((uint64_t)trial_sels[i] << (i * 3)); + + trial_block.m_selectors[0] = (uint8_t)sel_vals; + trial_block.m_selectors[1] = (uint8_t)(sel_vals >> 8); + trial_block.m_selectors[2] = (uint8_t)(sel_vals >> 16); + trial_block.m_selectors[3] = (uint8_t)(sel_vals >> 24); + trial_block.m_selectors[4] = (uint8_t)(sel_vals >> 32); + trial_block.m_selectors[5] = (uint8_t)(sel_vals >> 40); + + memcpy(pDst_bytes, &trial_block, sizeof(bc4_block)); + } // if (trial_err < best_err) + + } // hi_delta + + } // lo_delta + + } // mode + + return best_err; + } + + void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try) + { + assert(g_initialized); + + // 3-color blocks are not allowed with BC3 (on most GPU's). + flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks); + + encode_bc4(pDst, pPixels + 3, 4); + encode_bc1(static_cast(pDst) + 8, pPixels, flags, total_orderings_to_try); + } + + void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels) + { + assert(g_initialized); + + encode_bc4(pDst, pPixels + 3, 4); + encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); + } + + void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad, uint32_t alpha_modes) + { + assert(g_initialized); + + encode_bc4_hq(pDst, pPixels + 3, 4, alpha_search_rad, alpha_modes); + encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); + } + + void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) + { + assert(g_initialized); + + encode_bc4(pDst, pPixels + chan0, stride); + encode_bc4(static_cast(pDst) + 8, pPixels + chan1, stride); + } + + void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride, uint32_t alpha_search_rad, uint32_t alpha_modes) + { + assert(g_initialized); + + encode_bc4_hq(pDst, pPixels + chan0, stride, alpha_search_rad, alpha_modes); + encode_bc4_hq(static_cast(pDst) + 8, pPixels + chan1, stride, alpha_search_rad, alpha_modes); + } + + bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode) + { + const bc1_block* pBlock = static_cast(pBlock_bits); + + const uint32_t l = pBlock->get_low_color(); + const uint32_t h = pBlock->get_high_color(); + + const int cr0 = (l >> 11) & 31; + const int cg0 = (l >> 5) & 63; + const int cb0 = l & 31; + const int r0 = (cr0 << 3) | (cr0 >> 2); + const int g0 = (cg0 << 2) | (cg0 >> 4); + const int b0 = (cb0 << 3) | (cb0 >> 2); + + const int cr1 = (h >> 11) & 31; + const int cg1 = (h >> 5) & 63; + const int cb1 = h & 31; + const int r1 = (cr1 << 3) | (cr1 >> 2); + const int g1 = (cg1 << 2) | (cg1 >> 4); + const int b1 = (cb1 << 3) | (cb1 >> 2); + + bool used_punchthrough = false; + + if (l > h) + { + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + switch (mode) + { + case bc1_approx_mode::cBC1Ideal: + c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + break; + case bc1_approx_mode::cBC1IdealRound4: + c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); + c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); + c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + break; + } + } + else + { + c[0].set_noclamp_rgba(r0, g0, b0, 255); + c[1].set_noclamp_rgba(r1, g1, b1, 255); + switch (mode) + { + case bc1_approx_mode::cBC1Ideal: + case bc1_approx_mode::cBC1IdealRound4: + c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + break; + case bc1_approx_mode::cBC1NVidia: + c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); + break; + case bc1_approx_mode::cBC1AMD: + c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + break; + } + + c[3].set_noclamp_rgba(0, 0, 0, 0); + used_punchthrough = true; + } + + return used_punchthrough; + } + + // Returns true if the block uses 3 color punchthrough alpha mode. + bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode) + { + color32* pDst_pixels = static_cast(pPixels); + + static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); + static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); + + const bc1_block* pBlock = static_cast(pBlock_bits); + + color32 c[4]; + const bool used_punchthrough = unpack_bc1_block_colors(pBlock_bits, c, mode); + + if (set_alpha) + { + for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) + { + pDst_pixels[0] = c[pBlock->get_selector(0, y)]; + pDst_pixels[1] = c[pBlock->get_selector(1, y)]; + pDst_pixels[2] = c[pBlock->get_selector(2, y)]; + pDst_pixels[3] = c[pBlock->get_selector(3, y)]; + } + } + else + { + for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) + { + pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]); + } + } + + return used_punchthrough; + } + + void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride) + { + static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); + + const bc4_block* pBlock = static_cast(pBlock_bits); + + uint8_t sel_values[8]; + bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); + + const uint64_t selector_bits = pBlock->get_selector_bits(); + + for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) + { + pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; + pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; + pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; + pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; + } + } + + // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. + bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode) + { + color32* pDst_pixels = static_cast(pPixels); + + bool success = true; + + if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode)) + success = false; + + unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32)); + + return success; + } + + // writes RG + void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) + { + unpack_bc4(pBlock_bits, (uint8_t*)pPixels + chan0, stride); + unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t*)pPixels + chan1, stride); + } + +} // namespace rgbcx + + + diff --git a/libkram/bc7enc/rgbcx.h b/libkram/bc7enc/rgbcx.h index 748d39e2..d5680bc0 100644 --- a/libkram/bc7enc/rgbcx.h +++ b/libkram/bc7enc/rgbcx.h @@ -1,7 +1,9 @@ -// rgbcx.h v1.12 -// High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . +// rgbcx.h v1.13 +// High-performance scalar encoders and RDO (Rate Distortion Optimization) post processors for BC1-5. +// Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . // // Influential references: +// https://tinyurl.com/y3vxz457 (Ortego and Ramchandran, "Rate-distortion Methods for Image and Video Compression", 1998) // http://sjbrown.co.uk/2006/01/19/dxt-compression-techniques/ // https://github.com/nothings/stb/blob/master/stb_dxt.h // https://gist.github.com/castano/c92c7626f288f9e99e158520b14a61cf @@ -56,6 +58,10 @@ #ifndef RGBCX_INCLUDE_H #define RGBCX_INCLUDE_H +#ifdef _MSC_VER +#pragma warning (disable:4201) //nameless struct/union +#endif + #include #include //#include @@ -87,6 +93,57 @@ namespace rgbcx cBC1IdealRound4 = 3 }; + enum class eNoClamp { cNoClamp }; + static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + + template inline S maximum(S a, S b) { return (a > b) ? a : b; } + template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } + template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + + template inline S minimum(S a, S b) { return (a < b) ? a : b; } + template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } + template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + + struct color32 + { + union + { + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + color32() { } + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } + + void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } + + void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } + void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + + void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } + + uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } + uint8_t& operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } + + bool operator== (const color32& rhs) const { return m == rhs.m; } + + void set_rgb(const color32& other) { c[0] = static_cast(other.c[0]); c[1] = static_cast(other.c[1]); c[2] = static_cast(other.c[2]); } + + static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); } + static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } + }; + // init() MUST be called once before using the BC1 encoder. // This function may be called multiple times to change the BC1 approximation mode. // This function initializes global state, so don't call it while other threads inside the encoder. @@ -177,30 +234,41 @@ namespace rgbcx // Note that the 3 color modes won't be used at all until level 5 or higher. // No transparency supported, however if you set use_transparent_texels_for_black to true the encocer will use transparent selectors on very dark/black texels to reduce MSE. const uint32_t MIN_LEVEL = 0, MAX_LEVEL = 18; - void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black); + void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool use_transparent_texels_for_black, const uint8_t* pForce_selectors = nullptr); // Low-level interface for BC1 encoding. // Always returns a 4 color block, unless cEncodeBC1Use3ColorBlocksForBlackPixels or cEncodeBC1Use3ColorBlock flags are specified. // total_orderings_to_try controls the perf. vs. quality tradeoff on 4-color blocks when the cEncodeBC1UseLikelyTotalOrderings flag is used. It must range between [MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4]. // total_orderings_to_try3 controls the perf. vs. quality tradeoff on 3-color bocks when the cEncodeBC1UseLikelyTotalOrderings and the cEncodeBC1Use3ColorBlocks flags are used. Valid range is [0,MAX_TOTAL_ORDERINGS3] (0=disabled). - void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3); - + void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY, uint32_t total_orderings_to_try3 = DEFAULT_TOTAL_ORDERINGS_TO_TRY3, const uint8_t *pForce_selectors = nullptr); + + // Constants used for high quality BC4/BC5 encoding (and alpha of BC3) + const uint32_t BC4_DEFAULT_SEARCH_RAD = 3; + const uint32_t BC4_USE_MODE8_FLAG = 1; + const uint32_t BC4_USE_MODE6_FLAG = 2; + const uint32_t BC4_USE_ALL_MODES = 3; + // Encodes a 4x4 block of RGBA pixels to BC3 format. // There are two encode_bc3() functions. // The first is the recommended function, which accepts a level parameter. // The second is a low-level version that allows fine control over BC1 encoding. void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels); void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags = 0, uint32_t total_orderings_to_try = DEFAULT_TOTAL_ORDERINGS_TO_TRY); - + void encode_bc3_hq(uint32_t level, void* pDst, const uint8_t* pPixels, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES); + // Encodes a single channel to BC4. // stride is the source pixel stride in bytes. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride = 4); + uint32_t encode_bc4_hq(void* pDst, const uint8_t* pPixels, uint32_t stride = 4, uint32_t search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t mode_flag = BC4_USE_ALL_MODES, const uint8_t* pForce_selectors = nullptr); // Encodes two channels to BC5. // chan0/chan1 control which channels, stride is the source pixel stride in bytes. void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4); + void encode_bc5_hq(void* pDst, const uint8_t* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4, uint32_t alpha_search_rad = BC4_DEFAULT_SEARCH_RAD, uint32_t alpha_modes = BC4_USE_ALL_MODES); // Decompression functions. + + bool unpack_bc1_block_colors(const void* pBlock_bits, color32* c, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); // Returns true if the block uses 3 color punchthrough alpha mode. bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha = true, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); @@ -211,1273 +279,8 @@ namespace rgbcx bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode = bc1_approx_mode::cBC1Ideal); void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0 = 0, uint32_t chan1 = 1, uint32_t stride = 4); -} -#endif // #ifndef RGBCX_INCLUDE_H - -#ifdef RGBCX_IMPLEMENTATION -namespace rgbcx -{ - const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS4 = 969; - - // All total orderings for 16 pixels 2-bit selectors. - // BC1 selector order 0, 2, 3, 1 (i.e. the selectors are reordered into linear order). - static uint8_t g_unique_total_orders4[NUM_UNIQUE_TOTAL_ORDERINGS4][4] = - { - {0,8,2,6},{4,3,9,0},{4,8,1,3},{12,0,3,1},{11,3,2,0},{6,4,6,0},{7,5,0,4},{6,0,8,2},{1,0,0,15},{3,0,8,5},{1,1,13,1},{13,1,2,0},{0,14,1,1},{0,15,1,0},{0,13,0,3},{16,0,0,0},{4,3,4,5},{8,6,0,2},{0,10,0,6},{10,0,4,2},{7,2,1,6},{4,7,5,0},{1,4,7,4},{0,14,2,0},{2,7,2,5},{9,0,5,2},{9,2,2,3},{10,0,5,1},{2,3,7,4},{4,9,0,3},{1,5,0,10},{1,1,6,8}, - {6,6,4,0},{11,5,0,0},{11,2,0,3},{4,0,10,2},{2,3,10,1},{1,13,1,1},{0,14,0,2},{2,3,3,8},{12,3,1,0},{14,0,0,2},{9,1,3,3},{6,4,0,6},{1,1,5,9},{5,9,0,2},{2,10,1,3},{12,0,0,4},{4,6,6,0},{0,6,4,6},{3,7,4,2},{0,13,3,0},{3,10,0,3},{10,2,1,3},{1,12,1,2},{2,0,13,1},{11,0,5,0},{12,1,3,0},{6,4,5,1},{10,4,2,0},{3,6,1,6},{7,3,6,0},{10,4,0,2},{10,0,2,4}, - {0,5,9,2},{0,9,3,4},{6,4,2,4},{3,4,7,2},{3,3,5,5},{4,2,9,1},{6,2,8,0},{3,5,3,5},{4,10,1,1},{10,1,3,2},{5,7,0,4},{5,3,7,1},{6,8,1,1},{8,8,0,0},{11,1,0,4},{14,1,0,1},{9,3,2,2},{8,2,1,5},{0,0,2,14},{3,3,9,1},{10,1,5,0},{8,3,1,4},{1,5,8,2},{6,1,9,0},{3,2,1,10},{3,11,1,1},{7,6,3,0},{9,0,3,4},{5,2,5,4},{0,2,3,11},{15,0,0,1},{0,6,6,4}, - {3,4,9,0},{4,7,0,5},{0,4,4,8},{0,13,2,1},{2,4,1,9},{3,2,5,6},{10,6,0,0},{3,5,6,2},{8,0,4,4},{1,3,6,6},{7,7,0,2},{6,1,4,5},{0,11,1,4},{2,2,8,4},{0,1,2,13},{15,0,1,0},{7,2,6,1},{8,1,7,0},{1,8,4,3},{2,13,1,0},{1,0,7,8},{14,2,0,0},{1,8,1,6},{9,3,3,1},{0,0,7,9},{4,4,1,7},{9,0,6,1},{10,2,4,0},{1,7,3,5},{0,3,8,5},{5,2,4,5},{1,2,5,8}, - {0,8,7,1},{10,3,2,1},{12,0,4,0},{2,1,4,9},{5,2,2,7},{1,9,3,3},{15,1,0,0},{6,3,4,3},{9,5,0,2},{1,6,9,0},{6,6,0,4},{13,2,1,0},{5,1,8,2},{0,5,11,0},{7,1,0,8},{1,2,12,1},{0,3,3,10},{7,4,2,3},{5,1,4,6},{7,0,3,6},{3,12,0,1},{3,4,5,4},{1,10,0,5},{7,4,3,2},{10,5,0,1},{13,3,0,0},{2,5,4,5},{3,10,1,2},{5,1,2,8},{14,0,1,1},{1,5,4,6},{1,4,5,6}, - {2,3,11,0},{11,0,4,1},{11,2,2,1},{5,3,8,0},{1,3,10,2},{0,1,13,2},{3,1,4,8},{4,2,4,6},{1,5,6,4},{2,1,11,2},{1,2,9,4},{4,7,3,2},{6,2,5,3},{7,2,2,5},{8,1,4,3},{3,2,8,3},{12,1,0,3},{7,8,1,0},{7,0,2,7},{5,10,0,1},{0,2,14,0},{2,9,3,2},{7,0,0,9},{11,1,4,0},{10,4,1,1},{2,2,9,3},{5,7,2,2},{1,3,1,11},{13,2,0,1},{4,2,8,2},{2,3,1,10},{4,2,5,5}, - {7,0,7,2},{10,0,0,6},{0,8,5,3},{4,4,0,8},{12,4,0,0},{0,1,14,1},{8,0,1,7},{5,1,5,5},{11,0,3,2},{0,4,1,11},{0,8,8,0},{0,2,5,9},{7,3,2,4},{7,8,0,1},{1,0,3,12},{7,4,5,0},{1,6,7,2},{7,6,1,2},{9,6,1,0},{12,2,0,2},{4,1,6,5},{4,0,1,11},{8,4,4,0},{13,0,1,2},{8,6,2,0},{4,12,0,0},{2,7,5,2},{2,0,5,9},{5,4,5,2},{3,8,5,0},{7,3,3,3},{4,4,8,0}, - {2,1,3,10},{5,0,1,10},{6,4,3,3},{4,9,1,2},{1,4,0,11},{11,3,1,1},{4,0,12,0},{13,0,0,3},{6,1,6,3},{9,0,4,3},{8,0,0,8},{8,4,0,4},{0,12,1,3},{0,4,10,2},{3,4,8,1},{1,3,8,4},{9,2,5,0},{5,7,4,0},{1,0,11,4},{4,10,0,2},{1,3,12,0},{6,9,0,1},{5,0,9,2},{5,9,2,0},{13,1,0,2},{9,3,4,0},{9,4,0,3},{3,1,12,0},{2,4,3,7},{1,2,13,0},{2,2,4,8},{6,8,0,2}, - {9,2,1,4},{9,5,1,1},{2,0,4,10},{5,4,0,7},{0,0,6,10},{1,2,0,13},{4,7,2,3},{6,5,5,0},{3,3,1,9},{1,6,1,8},{12,2,1,1},{4,4,5,3},{1,0,6,9},{0,6,10,0},{4,8,3,1},{4,3,2,7},{2,1,7,6},{1,9,1,5},{3,1,3,9},{8,7,1,0},{1,2,3,10},{14,1,1,0},{5,4,4,3},{3,7,0,6},{7,4,1,4},{3,7,5,1},{1,1,0,14},{0,10,3,3},{0,4,3,9},{1,7,7,1},{2,0,10,4},{5,8,0,3}, - {6,7,3,0},{0,8,4,4},{5,7,3,1},{7,9,0,0},{7,6,2,1},{0,4,5,7},{6,3,5,2},{1,2,1,12},{5,2,0,9},{8,5,0,3},{4,6,1,5},{1,1,7,7},{10,5,1,0},{1,2,8,5},{1,8,2,5},{5,1,0,10},{6,9,1,0},{13,0,2,1},{8,3,5,0},{6,3,6,1},{2,11,3,0},{3,7,3,3},{1,5,2,8},{7,5,2,2},{0,6,7,3},{13,1,1,1},{5,3,4,4},{7,2,7,0},{5,8,3,0},{3,13,0,0},{0,7,9,0},{8,0,3,5}, - {1,3,7,5},{4,0,2,10},{12,0,1,3},{1,7,6,2},{3,9,0,4},{7,2,0,7},{0,1,7,8},{2,1,8,5},{0,13,1,2},{0,8,1,7},{5,0,11,0},{5,6,2,3},{0,3,0,13},{2,3,4,7},{5,6,3,2},{4,2,10,0},{3,3,7,3},{7,2,5,2},{1,1,11,3},{12,3,0,1},{5,1,1,9},{1,15,0,0},{9,7,0,0},{9,1,2,4},{0,7,3,6},{3,0,13,0},{3,0,11,2},{0,6,5,5},{8,2,2,4},{6,10,0,0},{4,8,4,0},{0,0,3,13}, - {0,4,12,0},{7,1,6,2},{3,5,0,8},{8,0,6,2},{6,2,3,5},{2,10,0,4},{4,11,0,1},{6,1,5,4},{5,1,3,7},{0,11,3,2},{4,6,0,6},{2,6,0,8},{3,1,7,5},{2,14,0,0},{2,9,2,3},{0,3,4,9},{11,0,1,4},{13,0,3,0},{8,3,0,5},{0,5,3,8},{5,11,0,0},{0,1,4,11},{2,1,9,4},{3,4,4,5},{7,1,2,6},{12,2,2,0},{9,4,1,2},{6,0,2,8},{4,6,2,4},{11,2,3,0},{3,2,2,9},{10,3,1,2}, - {1,1,2,12},{0,5,2,9},{0,1,11,4},{6,2,4,4},{2,8,2,4},{0,9,4,3},{11,0,2,3},{0,2,11,3},{6,0,7,3},{0,3,6,7},{4,5,5,2},{1,2,6,7},{7,5,1,3},{9,0,2,5},{2,6,4,4},{4,1,9,2},{4,8,2,2},{1,12,3,0},{0,9,6,1},{0,10,6,0},{3,1,5,7},{2,13,0,1},{2,2,1,11},{3,6,0,7},{5,6,5,0},{5,5,4,2},{4,0,3,9},{3,4,1,8},{0,11,2,3},{2,12,1,1},{7,1,3,5},{7,0,9,0}, - {8,0,8,0},{1,0,2,13},{3,3,10,0},{2,4,4,6},{2,3,8,3},{1,10,5,0},{7,3,0,6},{2,9,0,5},{1,4,6,5},{6,6,3,1},{5,6,0,5},{6,3,0,7},{3,10,2,1},{2,5,5,4},{3,8,4,1},{1,14,0,1},{10,3,3,0},{3,5,7,1},{1,1,3,11},{2,4,0,10},{9,3,1,3},{5,10,1,0},{3,0,6,7},{3,1,9,3},{11,2,1,2},{5,3,3,5},{0,5,1,10},{4,1,11,0},{10,2,0,4},{7,6,0,3},{2,7,0,7},{4,2,2,8}, - {6,1,7,2},{4,9,2,1},{0,0,8,8},{3,7,2,4},{9,6,0,1},{0,12,4,0},{6,7,1,2},{0,7,2,7},{1,0,10,5},{0,0,14,2},{2,7,3,4},{5,0,0,11},{7,7,1,1},{6,2,7,1},{4,5,3,4},{3,5,1,7},{5,9,1,1},{6,2,1,7},{3,2,0,11},{0,11,0,5},{3,11,2,0},{10,1,4,1},{7,0,4,5},{11,4,0,1},{10,3,0,3},{0,2,4,10},{0,15,0,1},{0,11,5,0},{6,7,2,1},{1,12,2,1},{4,1,3,8},{1,0,13,2}, - {1,8,5,2},{7,0,1,8},{3,12,1,0},{9,2,4,1},{1,7,4,4},{11,4,1,0},{4,3,8,1},{2,8,4,2},{1,11,3,1},{1,1,4,10},{4,10,2,0},{8,2,5,1},{1,0,9,6},{5,3,2,6},{0,9,7,0},{10,2,2,2},{5,8,1,2},{8,7,0,1},{0,3,12,1},{1,0,1,14},{4,8,0,4},{3,8,0,5},{4,6,5,1},{0,9,5,2},{10,2,3,1},{2,3,9,2},{1,0,12,3},{11,3,0,2},{4,5,2,5},{0,2,12,2},{9,1,0,6},{9,2,0,5}, - {1,2,7,6},{4,7,4,1},{0,12,2,2},{0,0,0,16},{2,8,3,3},{3,6,2,5},{0,6,3,7},{7,5,4,0},{3,3,3,7},{3,3,0,10},{5,0,6,5},{0,0,10,6},{8,5,3,0},{8,1,5,2},{6,0,9,1},{11,1,2,2},{2,11,2,1},{9,5,2,0},{3,0,4,9},{2,2,12,0},{2,6,6,2},{2,1,13,0},{6,0,5,5},{2,0,14,0},{2,11,1,2},{4,4,7,1},{2,0,11,3},{3,1,1,11},{2,9,4,1},{3,7,6,0},{14,0,2,0},{1,10,4,1}, - {8,0,7,1},{3,6,5,2},{0,3,11,2},{2,5,6,3},{11,1,3,1},{6,5,3,2},{3,8,1,4},{0,2,7,7},{2,10,2,2},{1,6,2,7},{11,0,0,5},{12,1,1,2},{12,1,2,1},{0,7,1,8},{0,3,9,4},{0,2,1,13},{7,1,4,4},{10,1,0,5},{4,0,8,4},{5,2,7,2},{0,2,0,14},{4,3,7,2},{2,7,1,6},{1,2,2,11},{6,3,3,4},{1,14,1,0},{2,4,6,4},{5,3,6,2},{5,3,5,3},{8,4,1,3},{1,3,0,12},{3,5,2,6}, - {1,8,7,0},{0,7,4,5},{2,1,6,7},{4,11,1,0},{7,2,4,3},{6,1,3,6},{4,5,4,3},{2,11,0,3},{1,5,7,3},{12,0,2,2},{5,0,4,7},{1,13,0,2},{7,7,2,0},{4,1,7,4},{4,5,0,7},{5,0,5,6},{6,5,4,1},{2,4,2,8},{1,10,1,4},{6,3,1,6},{3,3,8,2},{0,7,7,2},{4,4,2,6},{1,1,8,6},{1,12,0,3},{2,1,12,1},{1,9,2,4},{1,11,0,4},{2,5,2,7},{10,0,3,3},{4,6,3,3},{3,7,1,5}, - {1,9,0,6},{7,1,7,1},{1,6,5,4},{9,2,3,2},{6,2,2,6},{2,2,2,10},{8,3,3,2},{0,1,8,7},{2,0,8,6},{0,3,1,12},{9,4,2,1},{9,4,3,0},{6,2,6,2},{1,8,0,7},{5,1,10,0},{0,5,5,6},{8,2,4,2},{2,3,2,9},{6,0,3,7},{2,2,6,6},{2,6,2,6},{1,13,2,0},{9,3,0,4},{7,3,5,1},{6,5,2,3},{5,2,6,3},{2,0,12,2},{5,7,1,3},{8,1,3,4},{3,1,10,2},{1,0,15,0},{0,8,0,8}, - {5,0,7,4},{4,4,6,2},{0,1,0,15},{10,0,1,5},{7,3,4,2},{4,9,3,0},{2,5,7,2},{3,4,2,7},{8,3,2,3},{5,1,6,4},{0,10,2,4},{6,6,1,3},{6,0,0,10},{4,4,3,5},{1,3,9,3},{7,5,3,1},{3,0,7,6},{1,8,6,1},{4,3,0,9},{3,11,0,2},{6,0,6,4},{0,1,3,12},{0,4,2,10},{5,5,6,0},{4,1,4,7},{8,1,6,1},{5,6,4,1},{8,4,2,2},{4,3,1,8},{3,0,2,11},{1,11,4,0},{0,8,3,5}, - {5,1,7,3},{7,0,8,1},{4,3,5,4},{4,6,4,2},{3,2,4,7},{1,6,3,6},{0,7,8,1},{3,0,1,12},{9,1,4,2},{7,4,0,5},{1,7,0,8},{5,4,1,6},{9,1,5,1},{1,1,9,5},{4,1,1,10},{5,3,0,8},{2,2,5,7},{4,0,0,12},{9,0,7,0},{3,4,0,9},{0,2,6,8},{8,2,0,6},{3,2,6,5},{4,2,6,4},{3,6,4,3},{2,8,6,0},{5,0,3,8},{0,4,0,12},{0,16,0,0},{0,9,2,5},{4,0,11,1},{1,6,4,5}, - {0,1,6,9},{3,4,6,3},{3,0,10,3},{7,0,6,3},{1,4,9,2},{1,5,3,7},{8,5,2,1},{0,12,0,4},{7,2,3,4},{0,5,6,5},{11,1,1,3},{6,5,0,5},{2,1,5,8},{1,4,11,0},{9,1,1,5},{0,0,13,3},{5,8,2,1},{2,12,0,2},{3,3,6,4},{4,1,10,1},{4,0,5,7},{8,1,0,7},{5,1,9,1},{4,3,3,6},{0,2,2,12},{6,3,2,5},{0,0,12,4},{1,5,1,9},{2,6,5,3},{3,6,3,4},{2,12,2,0},{1,6,8,1}, - {10,1,1,4},{1,3,4,8},{7,4,4,1},{1,11,1,3},{1,2,10,3},{3,9,3,1},{8,5,1,2},{2,10,4,0},{4,2,0,10},{2,7,6,1},{8,2,3,3},{1,5,5,5},{3,1,0,12},{3,10,3,0},{8,0,5,3},{0,6,8,2},{0,3,13,0},{0,0,16,0},{1,9,4,2},{4,1,8,3},{1,6,6,3},{0,10,5,1},{0,1,12,3},{4,0,6,6},{3,8,3,2},{0,5,4,7},{1,0,14,1},{0,4,6,6},{3,9,1,3},{3,5,8,0},{3,6,6,1},{5,4,7,0}, - {3,0,12,1},{8,6,1,1},{2,9,5,0},{6,1,1,8},{4,1,2,9},{3,9,4,0},{5,2,9,0},{0,12,3,1},{1,4,10,1},{4,0,7,5},{3,1,2,10},{5,4,2,5},{5,5,5,1},{4,2,3,7},{1,7,5,3},{2,8,0,6},{8,1,2,5},{3,8,2,3},{6,1,2,7},{3,9,2,2},{9,0,0,7},{0,8,6,2},{8,4,3,1},{0,2,8,6},{6,5,1,4},{2,3,5,6},{2,10,3,1},{0,7,0,9},{4,2,7,3},{2,4,8,2},{7,1,1,7},{2,4,7,3}, - {2,4,10,0},{0,1,10,5},{4,7,1,4},{0,10,4,2},{9,0,1,6},{1,9,6,0},{3,3,4,6},{4,5,7,0},{5,5,2,4},{2,8,1,5},{2,3,6,5},{0,1,1,14},{3,2,3,8},{10,1,2,3},{9,1,6,0},{3,4,3,6},{2,2,0,12},{0,0,9,7},{4,0,9,3},{7,0,5,4},{4,5,6,1},{2,5,1,8},{2,5,9,0},{3,5,4,4},{1,3,11,1},{7,1,5,3},{3,2,7,4},{1,4,2,9},{1,11,2,2},{2,2,3,9},{5,0,10,1},{3,2,11,0}, - {1,10,3,2},{8,3,4,1},{3,6,7,0},{0,7,5,4},{1,3,3,9},{2,2,10,2},{1,9,5,1},{0,5,0,11},{3,0,3,10},{0,4,8,4},{2,7,7,0},{2,0,2,12},{1,2,11,2},{6,3,7,0},{0,6,2,8},{0,10,1,5},{0,9,0,7},{6,4,4,2},{6,0,1,9},{1,5,10,0},{5,4,6,1},{5,5,3,3},{0,0,4,12},{0,3,2,11},{1,4,1,10},{3,0,9,4},{5,5,0,6},{1,7,8,0},{2,0,3,11},{6,4,1,5},{10,0,6,0},{0,6,0,10}, - {0,4,11,1},{3,1,6,6},{2,5,8,1},{0,2,10,4},{3,1,11,1},{6,6,2,2},{1,1,10,4},{2,1,2,11},{6,1,8,1},{0,2,13,1},{0,7,6,3},{6,8,2,0},{3,0,0,13},{4,4,4,4},{6,2,0,8},{7,3,1,5},{0,11,4,1},{6,7,0,3},{2,6,3,5},{5,2,1,8},{7,1,8,0},{5,5,1,5},{1,8,3,4},{8,2,6,0},{6,0,10,0},{5,6,1,4},{1,4,4,7},{2,7,4,3},{1,4,8,3},{5,4,3,4},{1,10,2,3},{2,9,1,4}, - {2,2,11,1},{2,5,0,9},{0,0,1,15},{0,0,11,5},{0,4,7,5},{0,1,15,0},{2,1,0,13},{0,3,10,3},{8,0,2,6},{3,3,2,8},{3,5,5,3},{1,7,1,7},{1,3,2,10},{4,0,4,8},{2,0,9,5},{1,1,1,13},{2,2,7,5},{2,1,10,3},{4,2,1,9},{4,3,6,3},{1,3,5,7},{2,5,3,6},{1,0,8,7},{5,0,2,9},{2,8,5,1},{1,6,0,9},{0,0,5,11},{0,4,9,3},{2,0,7,7},{1,7,2,6},{2,1,1,12},{2,4,9,1}, - {0,5,7,4},{6,0,4,6},{3,2,10,1},{0,6,1,9},{2,6,1,7},{0,5,8,3},{4,1,0,11},{1,2,4,9},{4,1,5,6},{6,1,0,9},{1,4,3,8},{4,5,1,6},{1,0,5,10},{5,3,1,7},{0,9,1,6},{2,0,1,13},{2,0,6,8},{8,1,1,6},{1,5,9,1},{0,6,9,1},{0,3,5,8},{0,2,9,5},{5,2,8,1},{1,1,14,0},{3,2,9,2},{5,0,8,3},{0,5,10,1},{5,2,3,6},{2,6,7,1},{2,3,0,11},{0,1,9,6},{1,0,4,11}, - {3,0,5,8},{0,0,15,1},{2,4,5,5},{0,3,7,6},{2,0,0,14},{1,1,12,2},{2,6,8,0},{3,1,8,4},{0,1,5,10} - }; - // All total orderings for 16 pixels [0,2] 2-bit selectors. - // BC1 selector order: 0, 1, 2 - // Note this is different from g_unique_total_orders4[], which reorders the selectors into linear order. - const uint32_t NUM_UNIQUE_TOTAL_ORDERINGS3 = 153; - static uint8_t g_unique_total_orders3[NUM_UNIQUE_TOTAL_ORDERINGS3][3] = - { - {6,0,10},{3,6,7},{3,0,13},{13,3,0},{12,4,0},{9,1,6},{2,13,1},{4,7,5},{7,5,4},{9,6,1},{7,4,5},{8,6,2},{16,0,0},{10,6,0},{2,7,7}, - {0,0,16},{0,3,13},{1,15,0},{0,2,14},{1,4,11},{15,1,0},{1,12,3},{9,2,5},{14,1,1},{8,2,6},{3,3,10},{4,2,10},{14,0,2},{0,14,2},{1,7,8},{6,6,4}, - {11,5,0},{6,4,6},{11,3,2},{4,3,9},{7,1,8},{10,4,2},{12,1,3},{11,0,5},{9,3,4},{1,0,15},{9,0,7},{2,6,8},{12,2,2},{6,2,8},{6,8,2},{15,0,1}, - {4,8,4},{0,4,12},{8,5,3},{5,9,2},{11,2,3},{12,3,1},{6,3,7},{1,1,14},{2,9,5},{1,8,7},{4,10,2},{7,7,2},{13,1,2},{0,15,1},{3,2,11},{7,0,9}, - {4,4,8},{3,8,5},{0,5,11},{13,2,1},{1,10,5},{4,11,1},{3,10,3},{5,10,1},{10,2,4},{0,6,10},{14,2,0},{11,4,1},{3,12,1},{1,13,2},{1,5,10},{5,11,0}, - {12,0,4},{8,1,7},{6,10,0},{3,13,0},{7,2,7},{0,7,9},{5,8,3},{0,12,4},{11,1,4},{13,0,3},{0,16,0},{5,7,4},{10,3,3},{10,0,6},{0,13,3},{4,6,6}, - {2,8,6},{2,5,9},{7,8,1},{2,1,13},{2,0,14},{7,3,6},{5,1,10},{3,11,2},{5,4,7},{8,3,5},{10,5,1},{6,9,1},{1,3,12},{4,5,7},{2,2,12},{4,1,11}, - {0,8,8},{4,12,0},{6,5,5},{8,7,1},{5,5,6},{3,7,6},{7,9,0},{4,9,3},{0,10,6},{8,0,8},{5,3,8},{10,1,5},{6,1,9},{7,6,3},{9,5,2},{0,1,15}, - {9,7,0},{2,14,0},{3,4,9},{8,4,4},{9,4,3},{0,9,7},{1,9,6},{3,9,4},{5,2,9},{2,3,11},{5,6,5},{1,14,1},{6,7,3},{2,4,10},{2,12,2},{8,8,0}, - {2,10,4},{4,0,12},{0,11,5},{2,11,3},{1,11,4},{3,5,8},{5,0,11},{3,1,12},{1,2,13},{1,6,9} - }; - - // For each total ordering, this table indicates which other total orderings are likely to improve quality using a least squares pass. Each array is sorted by usefulness. - static uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_ORDERINGS4] = - { -#if RGBCX_USE_SMALLER_TABLES - { 202,120,13,318,15,23,403,450,5,51,260,128,77,21,33,494,515,523,4,141,269,1,2,700,137,49,48,102,7,64,753,82 }, - { 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349 }, - { 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70 }, - { 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440 }, - { 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453 }, - { 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6 }, - { 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95 }, - { 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1 }, - { 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116 }, - { 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33 }, - { 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762 }, - { 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1 }, - { 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10 }, - { 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180 }, - { 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899 }, - { 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365 }, - { 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273 }, - { 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90 }, - { 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65 }, - { 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210 }, - { 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577 }, - { 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341 }, - { 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36 }, - { 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141 }, - { 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1 }, - { 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177 }, - { 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343 }, - { 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134 }, - { 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169 }, - { 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250 }, - { 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719 }, - { 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131 }, - { 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137 }, - { 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59 }, - { 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53 }, - { 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546 }, - { 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23 }, - { 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15 }, - { 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196 }, - { 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478 }, - { 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93 }, - { 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277 }, - { 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416 }, - { 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232 }, - { 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120 }, - { 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119 }, - { 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281 }, - { 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11 }, - { 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1 }, - { 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82 }, - { 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108 }, - { 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82 }, - { 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386 }, - { 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3 }, - { 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41 }, - { 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64 }, - { 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401 }, - { 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38 }, - { 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23 }, - { 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99 }, - { 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566 }, - { 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165 }, - { 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117 }, - { 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736 }, - { 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11 }, - { 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401 }, - { 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673 }, - { 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238 }, - { 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21 }, - { 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118 }, - { 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700 }, - { 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517 }, - { 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14 }, - { 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136 }, - { 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75 }, - { 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163 }, - { 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14 }, - { 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352 }, - { 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432 }, - { 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515 }, - { 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128 }, - { 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319 }, - { 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51 }, - { 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103 }, - { 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719 }, - { 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352 }, - { 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159 }, - { 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494 }, - { 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93 }, - { 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14 }, - { 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11 }, - { 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90 }, - { 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388 }, - { 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21 }, - { 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148 }, - { 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403 }, - { 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7 }, - { 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457 }, - { 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352 }, - { 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401 }, - { 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272 }, - { 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13 }, - { 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457 }, - { 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96 }, - { 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95 }, - { 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385 }, - { 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269 }, - { 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246 }, - { 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27 }, - { 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169 }, - { 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137 }, - { 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5 }, - { 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87 }, - { 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98 }, - { 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75 }, - { 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102 }, - { 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23 }, - { 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196 }, - { 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55 }, - { 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42 }, - { 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49 }, - { 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541 }, - { 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122 }, - { 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95 }, - { 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66 }, - { 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31 }, - { 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10 }, - { 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737 }, - { 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40 }, - { 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106 }, - { 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99 }, - { 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827 }, - { 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354 }, - { 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112 }, - { 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49 }, - { 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32 }, - { 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313 }, - { 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102 }, - { 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730 }, - { 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21 }, - { 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12 }, - { 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165 }, - { 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494 }, - { 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23 }, - { 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326 }, - { 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58 }, - { 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23 }, - { 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51 }, - { 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752 }, - { 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230 }, - { 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388 }, - { 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287 }, - { 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57 }, - { 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21 }, - { 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126 }, - { 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721 }, - { 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70 }, - { 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94 }, - { 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327 }, - { 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23 }, - { 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64 }, - { 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2 }, - { 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139 }, - { 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6 }, - { 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21 }, - { 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260 }, - { 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778 }, - { 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463 }, - { 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424 }, - { 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13 }, - { 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763 }, - { 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113 }, - { 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5 }, - { 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526 }, - { 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32 }, - { 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194 }, - { 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507 }, - { 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291 }, - { 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450 }, - { 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464 }, - { 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260 }, - { 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133 }, - { 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763 }, - { 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753 }, - { 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257 }, - { 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346 }, - { 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667 }, - { 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88 }, - { 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431 }, - { 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565 }, - { 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219 }, - { 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48 }, - { 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534 }, - { 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437 }, - { 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401 }, - { 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765 }, - { 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21 }, - { 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120 }, - { 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470 }, - { 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212 }, - { 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53 }, - { 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373 }, - { 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180 }, - { 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165 }, - { 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90 }, - { 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177 }, - { 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197 }, - { 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3 }, - { 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13 }, - { 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263 }, - { 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153 }, - { 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54 }, - { 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65 }, - { 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800 }, - { 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269 }, - { 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719 }, - { 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753 }, - { 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901 }, - { 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69 }, - { 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1 }, - { 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12 }, - { 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202 }, - { 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396 }, - { 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11 }, - { 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318 }, - { 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352 }, - { 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353 }, - { 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56 }, - { 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410 }, - { 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453 }, - { 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144 }, - { 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961 }, - { 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312 }, - { 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123 }, - { 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401 }, - { 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19 }, - { 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403 }, - { 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77 }, - { 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124 }, - { 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23 }, - { 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210 }, - { 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5 }, - { 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730 }, - { 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38 }, - { 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7 }, - { 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372 }, - { 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51 }, - { 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269 }, - { 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719 }, - { 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177 }, - { 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59 }, - { 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349 }, - { 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404 }, - { 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10 }, - { 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438 }, - { 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4 }, - { 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511 }, - { 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2 }, - { 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535 }, - { 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137 }, - { 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318 }, - { 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217 }, - { 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409 }, - { 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515 }, - { 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772 }, - { 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49 }, - { 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14 }, - { 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0 }, - { 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638 }, - { 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901 }, - { 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428 }, - { 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13 }, - { 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242 }, - { 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158 }, - { 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228 }, - { 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117 }, - { 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244 }, - { 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153 }, - { 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330 }, - { 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173 }, - { 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312 }, - { 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155 }, - { 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898 }, - { 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117 }, - { 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0 }, - { 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323 }, - { 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900 }, - { 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469 }, - { 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165 }, - { 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269 }, - { 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3 }, - { 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12 }, - { 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128 }, - { 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48 }, - { 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32 }, - { 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260 }, - { 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39 }, - { 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59 }, - { 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330 }, - { 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125 }, - { 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12 }, - { 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900 }, - { 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150 }, - { 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201 }, - { 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120 }, - { 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515 }, - { 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32 }, - { 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37 }, - { 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102 }, - { 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19 }, - { 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132 }, - { 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64 }, - { 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33 }, - { 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14 }, - { 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289 }, - { 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10 }, - { 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14 }, - { 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12 }, - { 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365 }, - { 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315 }, - { 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18 }, - { 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662 }, - { 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2 }, - { 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51 }, - { 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52 }, - { 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13 }, - { 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33 }, - { 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95 }, - { 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32 }, - { 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475 }, - { 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418 }, - { 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151 }, - { 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197 }, - { 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329 }, - { 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74 }, - { 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125 }, - { 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283 }, - { 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580 }, - { 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86 }, - { 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117 }, - { 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13 }, - { 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401 }, - { 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17 }, - { 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214 }, - { 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14 }, - { 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77 }, - { 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706 }, - { 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700 }, - { 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74 }, - { 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6 }, - { 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7 }, - { 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260 }, - { 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291 }, - { 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324 }, - { 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318 }, - { 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678 }, - { 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230 }, - { 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867 }, - { 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21 }, - { 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362 }, - { 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24 }, - { 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569 }, - { 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867 }, - { 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102 }, - { 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10 }, - { 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117 }, - { 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83 }, - { 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5 }, - { 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133 }, - { 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2 }, - { 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18 }, - { 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125 }, - { 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6 }, - { 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115 }, - { 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623 }, - { 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108 }, - { 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392 }, - { 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21 }, - { 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57 }, - { 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31 }, - { 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335 }, - { 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15 }, - { 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93 }, - { 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215 }, - { 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717 }, - { 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128 }, - { 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125 }, - { 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95 }, - { 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278 }, - { 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700 }, - { 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736 }, - { 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22 }, - { 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86 }, - { 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12 }, - { 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0 }, - { 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395 }, - { 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631 }, - { 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91 }, - { 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6 }, - { 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755 }, - { 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592 }, - { 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9 }, - { 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59 }, - { 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523 }, - { 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23 }, - { 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51 }, - { 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466 }, - { 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221 }, - { 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5 }, - { 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21 }, - { 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251 }, - { 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551 }, - { 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523 }, - { 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21 }, - { 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246 }, - { 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453 }, - { 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4 }, - { 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911 }, - { 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33 }, - { 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159 }, - { 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24 }, - { 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317 }, - { 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77 }, - { 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958 }, - { 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31 }, - { 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790 }, - { 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509 }, - { 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534 }, - { 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413 }, - { 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420 }, - { 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181 }, - { 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117 }, - { 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134 }, - { 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54 }, - { 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951 }, - { 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744 }, - { 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4 }, - { 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40 }, - { 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51 }, - { 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65 }, - { 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494 }, - { 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414 }, - { 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137 }, - { 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351 }, - { 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73 }, - { 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231 }, - { 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4 }, - { 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848 }, - { 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38 }, - { 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464 }, - { 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165 }, - { 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172 }, - { 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961 }, - { 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134 }, - { 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21 }, - { 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165 }, - { 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391 }, - { 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351 }, - { 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22 }, - { 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284 }, - { 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128 }, - { 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112 }, - { 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443 }, - { 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411 }, - { 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720 }, - { 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14 }, - { 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403 }, - { 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125 }, - { 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134 }, - { 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6 }, - { 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22 }, - { 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38 }, - { 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51 }, - { 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10 }, - { 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79 }, - { 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719 }, - { 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257 }, - { 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421 }, - { 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410 }, - { 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867 }, - { 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6 }, - { 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259 }, - { 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498 }, - { 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77 }, - { 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13 }, - { 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137 }, - { 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14 }, - { 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45 }, - { 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48 }, - { 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9 }, - { 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9 }, - { 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738 }, - { 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125 }, - { 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470 }, - { 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8 }, - { 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123 }, - { 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227 }, - { 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111 }, - { 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341 }, - { 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854 }, - { 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130 }, - { 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450 }, - { 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32 }, - { 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99 }, - { 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12 }, - { 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30 }, - { 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197 }, - { 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59 }, - { 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653 }, - { 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117 }, - { 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288 }, - { 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194 }, - { 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137 }, - { 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52 }, - { 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64 }, - { 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937 }, - { 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87 }, - { 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273 }, - { 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7 }, - { 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95 }, - { 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60 }, - { 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291 }, - { 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818 }, - { 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457 }, - { 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700 }, - { 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216 }, - { 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7 }, - { 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548 }, - { 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46 }, - { 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22 }, - { 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856 }, - { 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44 }, - { 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480 }, - { 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3 }, - { 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10 }, - { 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244 }, - { 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29 }, - { 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51 }, - { 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569 }, - { 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689 }, - { 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51 }, - { 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31 }, - { 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1 }, - { 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430 }, - { 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22 }, - { 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58 }, - { 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110 }, - { 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133 }, - { 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53 }, - { 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869 }, - { 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65 }, - { 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4 }, - { 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487 }, - { 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14 }, - { 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543 }, - { 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229 }, - { 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23 }, - { 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7 }, - { 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341 }, - { 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9 }, - { 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147 }, - { 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730 }, - { 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154 }, - { 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108 }, - { 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317 }, - { 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38 }, - { 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117 }, - { 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253 }, - { 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560 }, - { 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37 }, - { 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101 }, - { 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394 }, - { 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21 }, - { 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416 }, - { 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202 }, - { 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21 }, - { 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12 }, - { 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260 }, - { 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22 }, - { 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40 }, - { 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87 }, - { 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18 }, - { 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5 }, - { 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217 }, - { 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109 }, - { 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117 }, - { 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698 }, - { 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99 }, - { 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352 }, - { 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346 }, - { 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64 }, - { 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4 }, - { 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87 }, - { 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821 }, - { 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75 }, - { 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95 }, - { 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36 }, - { 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269 }, - { 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467 }, - { 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293 }, - { 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341 }, - { 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752 }, - { 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75 }, - { 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285 }, - { 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55 }, - { 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59 }, - { 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222 }, - { 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2 }, - { 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13 }, - { 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40 }, - { 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21 }, - { 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53 }, - { 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279 }, - { 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531 }, - { 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61 }, - { 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93 }, - { 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125 }, - { 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137 }, - { 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40 }, - { 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5 }, - { 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70 }, - { 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480 }, - { 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18 }, - { 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4 }, - { 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624 }, - { 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77 }, - { 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33 }, - { 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890 }, - { 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621 }, - { 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40 }, - { 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445 }, - { 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222 }, - { 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632 }, - { 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572 }, - { 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258 }, - { 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331 }, - { 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280 }, - { 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180 }, - { 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93 }, - { 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32 }, - { 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24 }, - { 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220 }, - { 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752 }, - { 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561 }, - { 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0 }, - { 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153 }, - { 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5 }, - { 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118 }, - { 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544 }, - { 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314 }, - { 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125 }, - { 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80 }, - { 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30 }, - { 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0 }, - { 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312 }, - { 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25 }, - { 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83 }, - { 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202 }, - { 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291 }, - { 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129 }, - { 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534 }, - { 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0 }, - { 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36 }, - { 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318 }, - { 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39 }, - { 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14 }, - { 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663 }, - { 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119 }, - { 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23 }, - { 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345 }, - { 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317 }, - { 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197 }, - { 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55 }, - { 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111 }, - { 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517 }, - { 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140 }, - { 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411 }, - { 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126 }, - { 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31 }, - { 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968 }, - { 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216 }, - { 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312 }, - { 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234 }, - { 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173 }, - { 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684 }, - { 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93 }, - { 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960 }, - { 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427 }, - { 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100 }, - { 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36 }, - { 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64 }, - { 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105 }, - { 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7 }, - { 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263 }, - { 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676 }, - { 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363 }, - { 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66 }, - { 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6 }, - { 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839 }, - { 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11 }, - { 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457 }, - { 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141 }, - { 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21 }, - { 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6 }, - { 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128 }, - { 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154 }, - { 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734 }, - { 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707 }, - { 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320 }, - { 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13 }, - { 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209 }, - { 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0 }, - { 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6 }, - { 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753 }, - { 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56 }, - { 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808 }, - { 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616 }, - { 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93 }, - { 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32 }, - { 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6 }, - { 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89 }, - { 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428 }, - { 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67 }, - { 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317 }, - { 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101 }, - { 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105 }, - { 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498 }, - { 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68 }, - { 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0 }, - { 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230 }, - { 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457 }, - { 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254 }, - { 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547 }, - { 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36 }, - { 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9 }, - { 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7 }, - { 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216 }, - { 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935 }, - { 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790 }, - { 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38 }, - { 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685 }, - { 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67 }, - { 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667 }, - { 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117 }, - { 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284 }, - { 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99 }, - { 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233 }, - { 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10 }, - { 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816 }, - { 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7 }, - { 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904 }, - { 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33 }, - { 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7 }, - { 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341 }, - { 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51 }, - { 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264 }, - { 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158 }, - { 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59 }, - { 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93 }, - { 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419 }, - { 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69 }, - { 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48 }, - { 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206 }, - { 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6 }, - { 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583 }, - { 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31 }, - { 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108 }, - { 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197 }, - { 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21 }, - { 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214 }, - { 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64 }, - { 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351 }, - { 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367 }, - { 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22 }, - { 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11 }, - { 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102 }, - { 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51 }, - { 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23 }, - { 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352 }, - { 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60 }, - { 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54 }, - { 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264 }, - { 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16 }, - { 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197 }, - { 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192 }, - { 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25 }, - { 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125 }, - { 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502 }, - { 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482 }, - { 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117 }, - { 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113 }, - { 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6 }, - { 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22 }, - { 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709 }, - { 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23 }, - { 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90 }, - { 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154 }, - { 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323 }, - { 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61 }, - { 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505 }, - { 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64 }, - { 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115 }, - { 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14 }, - { 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17 }, - { 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43 }, - { 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64 }, - { 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28 }, - { 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317 }, - { 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232 }, - { 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809 }, - { 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55 }, - { 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55 }, - { 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435 }, - { 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216 }, - { 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515 }, - { 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727 }, - { 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1 }, - { 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351 }, - { 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265 }, - { 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7 }, - { 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10 }, - { 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520 }, - { 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352 }, - { 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46 }, - { 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197 }, - { 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470 }, - { 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158 }, - { 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28 }, - { 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543 }, - { 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254 }, - { 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678 }, - { 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253 }, - { 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514 }, - { 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42 }, - { 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269 }, - { 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6 }, - { 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21 }, - { 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708 }, - { 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202 }, - { 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115 }, - { 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251 }, - { 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260 }, - { 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7 }, - { 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253 }, - { 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12 }, - { 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22 }, - { 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165 }, - { 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421 }, - { 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615 }, - { 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97 }, - { 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850 }, - { 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317 }, - { 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307 }, - { 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26 }, - { 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177 }, - { 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341 }, - { 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116 }, - { 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166 }, - { 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232 }, - { 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365 }, - { 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373 }, - { 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22 }, - { 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817 }, - { 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753 }, - { 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120 }, - { 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419 }, - { 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323 }, - { 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341 }, - { 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36 }, - { 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19 }, - { 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95 }, - { 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268 }, - { 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947 }, - { 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457 }, - { 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137 }, - { 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134 }, - { 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864 }, - { 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22 }, - { 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22 }, - { 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319 }, - { 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569 }, - { 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288 }, - { 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126 }, - { 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105 }, - { 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317 }, - { 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448 }, - { 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74 }, - { 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115 }, - { 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21 }, - { 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174 }, - { 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455 }, - { 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160 }, - { 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301 }, - { 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572 }, - { 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12 }, - { 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69 }, - { 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208 }, - { 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120 }, - { 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569 }, - { 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901 }, - { 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48 }, - { 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217 }, - { 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120 }, - { 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10 }, - { 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10 }, - { 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13 }, - { 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50 }, - { 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199 }, - { 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856 }, - { 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5 }, - { 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567 }, - { 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341 }, - { 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344 }, - { 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616 }, - { 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919 }, - { 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5 }, - { 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21 }, - { 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49 }, - { 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1 }, - { 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166 }, - { 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37 }, - { 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96 }, - { 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115 }, - { 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341 }, - { 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512 }, - { 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168 }, - { 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251 }, - { 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238 }, - { 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453 }, - { 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638 }, - { 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166 }, - { 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753 }, - { 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10 }, - { 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269 }, - { 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473 }, - { 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384 }, - { 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13 }, - { 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850 }, - { 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557 }, - { 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649 }, - { 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940 }, - { 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626 }, - { 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5 }, - { 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197 }, - { 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692 }, - { 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9 }, - { 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44 }, - { 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457 }, - { 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115 }, - { 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260 }, - { 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87 }, - { 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6 }, - { 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88 }, - { 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23 }, - { 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341 }, - { 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167 }, - { 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69 }, - { 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704 }, - { 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141 }, - { 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110 }, - { 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190 }, - { 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899 }, - { 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346 }, - { 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110 }, - { 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197 }, - { 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8 }, - { 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128 }, - { 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346 }, - { 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854 } -#else - #include "rgbcx_table4.h" -#endif - }; - - static uint8_t g_best_total_orderings3[NUM_UNIQUE_TOTAL_ORDERINGS3][32] = - { - { 12,1,3,5,27,2,4,38,8,7,16,18,6,10,41,79,40,23,46,9,20,88,22,37,14,19,24,126,99,119,35,11 }, - { 7,64,116,14,94,30,8,42,1,108,47,55,137,10,134,95,96,115,69,32,63,29,90,113,11,148,16,103,19,9,34,25 }, - { 12,1,0,5,3,7,4,27,8,6,38,40,41,16,18,46,9,10,20,23,79,62,14,22,88,99,37,126,92,19,120,11 }, - { 16,88,27,18,46,48,126,107,79,19,59,38,37,65,23,66,0,2,3,43,12,151,28,25,5,87,72,40,1,20,52,92 }, - { 79,48,88,16,27,65,18,38,46,19,37,4,72,33,126,41,52,0,12,92,5,1,2,107,3,77,23,91,43,51,22,74 }, - { 1,8,41,122,10,22,2,0,87,24,37,120,38,7,39,4,5,3,9,92,62,59,23,16,104,11,27,79,19,26,25,32 }, - { 2,76,99,28,40,86,93,21,138,60,6,0,17,128,145,119,98,144,141,82,147,54,67,75,5,12,27,132,146,1,38,14 }, - { 47,7,64,90,1,118,116,85,57,14,30,94,50,45,137,134,8,42,69,139,55,68,58,108,95,29,10,115,0,32,2,11 }, - { 49,8,10,30,124,11,32,113,130,58,125,9,100,53,104,115,131,103,24,7,1,39,45,36,139,0,137,22,90,44,114,105 }, - { 9,38,72,125,49,41,84,11,13,5,27,0,16,92,8,2,65,105,10,18,48,29,127,131,36,14,1,46,111,79,130,12 }, - { 130,8,10,100,104,131,49,32,53,39,30,36,113,24,11,22,124,44,83,58,7,103,1,4,9,125,5,0,91,33,115,74 }, - { 114,11,58,8,120,49,9,124,142,111,41,30,10,0,97,130,62,84,38,5,72,125,92,127,100,27,139,113,13,132,32,1 }, - { 60,46,28,27,40,20,0,17,18,2,126,16,6,38,86,23,79,54,1,93,5,88,41,14,21,111,7,48,3,84,72,62 }, - { 72,92,38,65,84,48,41,79,27,16,29,111,88,5,18,46,1,0,152,14,37,19,77,42,132,7,22,13,119,56,12,2 }, - { 7,55,1,95,29,56,64,116,143,8,14,30,47,94,152,90,65,67,10,133,42,72,146,84,16,48,6,0,25,108,77,21 }, - { 27,23,20,5,0,79,38,2,3,1,59,46,4,41,33,86,37,87,88,92,7,126,43,8,22,152,151,150,149,148,147,146 }, - { 12,0,1,2,7,6,3,5,28,4,8,14,60,40,17,19,21,86,126,93,10,18,9,29,48,99,65,25,84,119,72,41 }, - { 60,40,99,2,54,12,0,1,19,28,98,93,6,138,21,5,27,17,151,14,76,46,16,18,38,29,86,144,107,7,25,41 }, - { 12,0,1,2,3,5,6,7,4,28,8,60,14,40,16,17,21,10,19,9,86,38,126,41,93,27,29,48,62,84,79,99 }, - { 0,1,2,10,5,8,3,25,4,29,32,34,63,7,77,26,16,48,65,56,14,22,129,103,72,24,18,152,140,53,96,42 }, - { 46,126,18,54,12,16,1,0,5,2,27,98,20,23,6,3,88,48,28,7,19,8,4,60,151,38,37,21,79,14,65,40 }, - { 76,6,141,86,119,2,138,67,28,145,0,93,17,1,40,60,146,99,147,14,21,144,132,7,5,29,55,27,16,75,19,12 }, - { 71,5,51,39,22,80,0,43,10,122,8,62,41,24,104,87,35,37,2,91,33,120,36,38,1,131,9,100,130,66,3,4 }, - { 126,18,46,27,20,16,88,23,12,79,54,59,48,0,73,1,37,151,5,19,28,38,2,66,60,3,65,98,14,26,6,43 }, - { 22,10,8,5,0,71,35,80,104,39,24,51,100,1,62,32,2,130,11,41,7,9,53,43,49,83,122,120,30,44,37,38 }, - { 1,34,14,129,53,63,42,26,121,148,7,44,96,10,0,24,100,32,64,116,140,22,5,19,29,103,135,108,8,61,39,83 }, - { 1,7,34,63,44,25,135,14,24,108,22,0,83,94,5,129,35,101,47,121,2,19,42,53,6,110,103,8,148,10,16,123 }, - { 12,28,16,60,18,1,6,21,14,0,86,19,2,48,93,17,38,29,7,5,65,126,46,72,41,79,84,119,40,56,54,88 }, - { 0,2,12,27,5,46,38,40,41,79,88,99,3,23,1,62,20,4,22,37,92,35,18,8,16,24,10,60,7,120,98,54 }, - { 1,7,14,56,8,0,84,67,10,2,133,72,42,111,5,30,21,4,9,3,25,94,16,116,47,11,65,18,132,90,55,64 }, - { 30,8,124,139,45,11,58,90,113,137,7,115,10,32,1,49,94,85,9,47,108,103,0,97,63,14,50,114,53,106,100,25 }, - { 65,38,48,27,16,79,72,18,88,19,46,77,84,92,37,41,0,29,1,14,12,111,2,5,31,36,87,74,105,40,28,51 }, - { 10,8,30,113,130,100,53,32,115,103,104,7,1,121,39,49,131,44,24,36,63,137,34,45,22,90,108,83,26,11,94,139 }, - { 51,52,43,33,5,74,16,37,71,91,38,3,36,87,48,22,4,0,122,41,39,18,66,27,79,24,65,88,59,23,62,92 }, - { 1,7,63,53,108,121,94,44,103,100,14,10,129,47,32,26,24,25,148,42,135,22,0,61,83,8,39,104,5,64,115,34 }, - { 1,8,10,7,5,0,80,32,62,2,24,44,53,83,9,41,30,22,100,11,14,25,120,4,26,6,3,16,122,34,19,35 }, - { 74,4,36,48,33,91,39,79,22,16,65,5,131,38,24,71,27,52,0,105,51,18,88,104,3,31,10,37,72,19,41,130 }, - { 59,43,38,79,23,27,92,51,0,16,46,5,18,88,41,37,66,3,87,20,48,2,122,4,22,12,1,126,19,65,33,24 }, - { 12,28,1,27,0,16,2,46,65,60,21,3,5,18,6,19,48,14,4,7,79,88,86,29,22,72,93,40,23,8,17,41 }, - { 22,91,39,33,24,71,5,131,36,10,51,0,130,8,104,2,35,125,9,43,52,49,83,80,100,41,122,3,37,38,4,16 }, - { 12,0,1,2,5,3,4,8,7,27,18,38,10,6,16,46,9,20,41,23,126,79,22,14,19,99,88,54,37,48,62,35 }, - { 12,27,1,2,3,0,46,4,38,16,8,28,7,79,18,5,84,6,88,10,14,21,23,20,40,22,60,19,9,29,72,65 }, - { 1,14,7,55,95,29,8,94,30,56,10,108,77,116,152,64,32,48,63,42,143,148,16,25,137,65,11,0,115,9,19,72 }, - { 37,79,66,38,16,52,48,59,43,27,87,33,41,4,23,51,3,5,88,18,92,46,73,122,22,71,20,0,65,19,2,120 }, - { 24,32,83,22,53,1,8,10,7,30,35,5,103,0,100,101,121,113,34,123,63,2,44,25,71,115,80,14,26,108,51,39 }, - { 97,45,111,58,85,139,0,90,47,7,120,106,142,30,50,132,41,62,84,1,119,114,14,56,117,8,38,29,2,64,116,5 }, - { 12,28,16,18,1,60,6,14,2,21,0,86,126,19,48,93,7,27,17,29,5,65,54,38,72,79,84,88,119,145,8,111 }, - { 118,47,64,116,57,85,7,14,50,1,42,0,45,68,86,69,2,111,134,28,90,55,16,29,56,48,84,144,60,30,112,41 }, - { 12,1,2,0,7,6,28,5,3,4,8,14,60,21,18,40,17,86,10,9,16,29,19,93,126,79,38,84,72,27,111,119 }, - { 11,8,49,130,10,125,9,124,100,114,131,30,58,104,32,39,24,113,36,105,0,41,22,120,5,53,111,38,142,44,83,35 }, - { 50,70,47,118,85,57,106,0,45,7,64,90,81,14,2,134,28,62,86,55,69,1,78,119,68,56,18,67,16,60,29,21 }, - { 43,37,33,87,51,41,66,5,122,38,22,59,92,0,23,91,27,16,71,79,18,52,120,4,3,24,46,20,73,39,62,36 }, - { 79,48,4,16,27,88,43,33,18,38,65,37,46,3,19,51,52,22,66,87,74,5,41,91,23,59,0,71,122,72,20,92 }, - { 32,100,10,8,30,104,24,44,39,113,83,103,1,7,22,53,115,63,135,121,26,35,34,5,0,108,137,90,91,45,2,130 }, - { 0,1,2,5,16,12,6,7,14,3,19,18,29,20,4,21,40,8,17,35,23,48,126,22,25,56,26,10,98,27,38,65 }, - { 143,67,56,146,1,7,133,55,64,141,134,69,6,47,14,29,84,21,111,147,57,16,95,72,118,132,50,0,2,18,119,42 }, - { 1,7,67,14,133,111,8,84,0,21,2,47,64,132,55,10,95,147,119,42,16,5,72,56,4,3,6,29,9,25,18,30 }, - { 68,57,69,112,144,86,102,2,134,55,0,70,118,64,75,47,14,28,93,143,67,7,50,149,1,21,29,56,119,95,60,78 }, - { 58,97,114,30,124,45,11,139,8,90,0,142,7,10,41,113,84,62,49,111,85,1,9,5,137,120,32,14,2,117,47,38 }, - { 23,66,18,79,38,20,43,27,16,88,46,59,126,37,87,12,73,92,3,5,48,0,19,54,2,51,28,1,41,65,122,22 }, - { 0,12,2,27,5,40,46,38,1,41,3,79,88,23,99,4,20,62,22,54,92,18,8,37,16,35,10,7,19,120,144,24 }, - { 1,14,25,26,0,7,44,34,129,42,24,5,135,22,19,148,6,96,83,2,29,16,63,35,101,64,140,136,116,110,3,10 }, - { 12,1,2,27,3,4,38,5,7,8,18,16,46,6,0,40,41,10,79,23,88,9,20,22,14,19,37,92,48,126,28,21 }, - { 7,1,10,32,108,103,94,47,8,53,25,14,34,115,100,129,121,130,148,42,64,116,63,26,44,0,24,30,113,4,104,22 }, - { 47,134,7,14,55,69,64,95,1,29,85,118,56,116,45,57,102,143,50,90,42,30,16,94,0,8,67,75,133,2,18,48 }, - { 12,1,2,0,7,6,28,8,14,5,3,4,40,21,17,18,60,86,16,93,126,10,9,29,99,38,119,25,19,54,27,84 }, - { 59,16,27,18,23,88,79,37,46,66,38,20,73,126,3,43,48,87,92,51,41,12,19,5,52,107,65,0,151,122,54,2 }, - { 1,21,147,7,119,14,76,132,55,0,86,145,2,6,69,67,16,143,111,138,17,28,29,60,18,93,8,19,40,56,84,5 }, - { 144,86,112,2,68,102,69,0,149,93,75,28,57,55,145,60,21,67,99,134,143,40,146,119,82,110,62,6,29,26,78,14 }, - { 102,57,55,69,143,75,146,67,56,68,134,2,29,141,0,21,6,14,133,118,64,1,7,95,47,84,111,28,147,82,72,119 }, - { 0,70,57,119,50,145,2,86,28,118,69,78,149,47,60,68,67,55,93,81,134,21,14,62,64,7,5,1,132,85,41,16 }, - { 51,5,43,71,122,87,41,37,91,39,0,22,33,36,38,24,66,120,62,2,80,16,92,10,59,4,27,23,35,79,8,3 }, - { 12,1,2,0,7,6,28,5,8,14,3,21,40,4,60,17,86,18,16,93,10,9,126,119,99,29,19,41,38,27,25,92 }, - { 27,18,46,126,23,16,88,79,20,151,59,73,48,38,0,54,12,2,37,1,19,5,28,60,66,41,3,109,86,65,40,6 }, - { 48,79,4,33,16,74,65,38,88,27,91,52,18,36,22,19,46,0,37,3,51,5,71,39,72,43,24,41,92,87,2,10 }, - { 86,2,144,93,28,112,141,6,102,21,99,60,75,0,68,82,69,146,67,149,55,40,145,76,111,147,56,119,110,143,26,132 }, - { 6,138,2,99,86,17,40,93,28,21,145,141,0,60,119,147,128,76,67,54,1,12,5,27,144,14,38,98,146,41,29,19 }, - { 1,8,0,10,2,29,7,5,3,56,4,25,14,152,63,32,65,72,96,42,34,108,48,9,26,16,84,103,67,148,22,129 }, - { 149,145,0,86,2,28,93,144,62,60,119,101,21,41,5,35,78,99,26,40,12,68,57,67,110,120,69,18,55,76,132,70 }, - { 12,28,16,1,48,19,6,60,2,14,18,21,0,27,46,65,86,29,5,7,72,93,40,3,17,84,56,88,126,4,38,8 }, - { 1,8,5,10,7,24,2,62,0,41,22,122,120,9,4,3,32,87,11,37,38,83,100,44,25,104,16,26,39,80,14,6 }, - { 0,119,62,86,145,149,28,132,93,2,120,67,60,41,35,5,144,21,123,38,111,81,84,56,12,44,24,50,92,55,40,22 }, - { 2,93,99,28,40,144,60,0,86,150,76,21,149,98,6,25,1,61,82,26,12,5,54,141,7,18,145,16,27,138,110,38 }, - { 24,8,10,22,32,35,100,5,1,53,0,7,71,80,30,123,83,104,51,11,2,39,44,113,9,62,25,103,34,101,43,41 }, - { 12,1,2,0,7,6,28,5,40,60,8,16,3,18,14,4,86,21,17,93,41,10,9,99,27,119,38,19,126,22,48,145 }, - { 45,47,50,7,85,90,97,1,64,139,116,118,30,58,14,106,70,111,0,57,94,42,137,142,29,120,8,56,18,134,84,41 }, - { 12,0,2,5,27,38,1,46,41,40,79,144,3,22,88,23,28,60,99,62,6,24,26,7,4,16,10,35,37,18,14,20 }, - { 37,38,59,92,0,5,23,51,79,41,27,22,2,3,87,16,46,4,1,43,20,33,18,88,24,71,8,10,48,19,126,122 }, - { 12,28,16,60,1,18,6,21,19,14,48,0,2,86,93,5,46,29,17,27,65,7,3,72,38,126,119,40,84,37,56,4 }, - { 0,2,5,1,16,6,27,28,18,38,60,7,14,21,46,40,86,41,19,48,93,8,3,79,22,4,10,37,62,23,24,111 }, - { 85,7,90,30,47,139,45,50,94,58,137,1,8,64,14,116,118,115,113,11,124,108,0,10,97,57,32,70,42,106,29,114 }, - { 33,36,22,71,51,5,91,39,0,52,43,24,131,74,16,37,38,122,41,3,87,48,4,104,35,80,10,2,105,62,27,18 }, - { 12,1,27,2,0,16,3,28,46,18,4,6,5,72,21,79,38,7,14,60,88,8,65,19,48,29,23,40,22,20,86,126 }, - { 0,12,2,27,5,38,46,41,1,40,79,3,88,23,22,99,20,37,62,4,18,6,16,35,60,28,24,7,92,8,14,10 }, - { 7,47,1,30,137,8,116,94,90,64,14,115,108,118,57,10,148,113,42,85,32,11,63,50,103,45,124,134,55,9,69,34 }, - { 55,7,1,29,56,143,64,47,67,133,14,146,95,72,84,8,116,111,6,134,141,21,65,0,69,30,16,45,85,42,50,10 }, - { 14,1,42,8,10,29,108,63,55,148,95,32,7,19,25,115,103,34,56,129,77,0,16,152,94,30,113,26,2,5,48,4 }, - { 111,120,142,97,58,0,41,45,62,132,114,84,139,30,5,8,38,2,7,85,119,90,117,1,124,11,56,47,28,27,35,72 }, - { 1,0,14,2,6,5,16,19,7,29,42,18,3,25,12,35,21,8,26,17,40,4,20,48,109,99,22,96,55,101,10,61 }, - { 12,0,1,5,3,2,4,7,27,8,38,6,40,18,16,10,20,46,9,41,23,22,79,14,62,19,37,126,88,11,92,48 }, - { 10,8,104,39,24,32,22,83,44,100,30,130,53,91,113,5,11,1,35,33,7,49,0,2,103,71,36,124,9,80,131,34 }, - { 1,7,0,14,8,34,5,25,35,26,6,63,10,123,2,16,103,19,44,32,135,121,108,80,62,30,115,94,149,144,53,18 }, - { 75,68,146,141,102,67,2,21,6,57,69,143,0,55,82,86,28,144,147,29,93,112,56,119,133,14,76,60,84,134,111,145 }, - { 10,32,115,7,8,53,1,108,30,113,94,137,100,63,90,34,130,103,121,47,44,25,104,39,24,26,85,14,49,36,22,131 }, - { 39,24,10,22,8,130,91,104,83,49,5,33,100,11,0,35,32,131,71,36,9,44,53,2,80,51,30,1,41,7,43,62 }, - { 38,36,65,105,27,72,31,79,41,131,5,48,125,39,0,16,92,46,22,13,18,84,24,37,88,2,33,74,91,71,130,49 }, - { 0,106,62,50,45,119,85,81,132,28,2,86,41,47,38,60,35,117,5,29,7,30,145,90,55,70,14,111,18,67,93,56 }, - { 0,2,5,1,3,25,19,26,4,34,29,10,22,16,8,7,24,14,48,65,53,18,6,77,44,56,72,61,121,21,136,40 }, - { 7,1,94,8,47,115,10,32,113,103,30,108,137,63,14,64,116,148,129,42,90,25,34,118,53,57,11,49,85,9,96,50 }, - { 14,0,1,26,19,5,42,2,25,24,29,22,6,44,61,16,7,96,136,3,140,34,35,55,135,18,48,77,83,4,8,10 }, - { 1,7,14,0,25,6,34,5,26,16,63,2,19,8,35,101,108,29,94,10,18,42,123,144,129,47,61,21,3,62,149,4 }, - { 12,0,2,1,28,5,6,120,7,60,40,16,18,86,27,14,21,93,8,62,41,38,3,17,4,119,99,48,19,126,10,9 }, - { 86,144,93,2,28,149,0,60,99,112,110,145,40,21,102,26,75,62,69,1,12,101,119,25,76,67,7,68,55,5,6,14 }, - { 8,30,10,32,113,49,115,137,124,103,45,90,7,139,11,1,58,53,130,94,108,100,9,63,85,125,34,47,0,24,44,104 }, - { 120,142,111,41,58,114,97,0,11,62,84,124,5,30,8,38,132,127,27,139,92,10,72,45,49,9,28,2,29,56,16,1 }, - { 8,113,30,137,7,32,10,90,94,115,1,103,108,63,47,85,49,53,11,45,34,50,14,25,9,124,100,130,139,121,42,26 }, - { 64,7,14,47,134,55,1,42,95,69,116,90,94,30,8,29,56,137,45,108,85,10,57,16,102,143,118,19,63,32,11,50 }, - { 62,132,0,119,120,41,111,86,35,28,5,84,56,38,2,93,145,60,67,12,92,27,29,72,55,117,21,24,133,149,22,45 }, - { 57,68,69,118,134,64,50,47,55,14,7,2,102,144,0,112,70,86,85,1,95,29,116,143,42,75,16,56,28,45,21,48 }, - { 0,12,2,1,5,28,6,40,60,27,7,38,16,14,86,18,93,41,62,46,99,35,8,23,3,17,22,21,10,19,79,20 }, - { 12,1,2,27,16,3,38,111,4,0,18,5,7,46,40,8,79,6,14,28,88,10,48,41,19,84,21,9,22,23,20,72 }, - { 53,103,32,7,1,100,22,63,71,44,10,115,108,24,92,104,26,30,122,94,8,39,83,34,137,135,90,91,121,5,87,47 }, - { 87,37,41,0,22,38,2,92,1,24,4,8,3,59,10,5,39,23,71,79,122,27,16,46,33,7,91,20,18,51,9,120 }, - { 1,7,8,10,0,5,35,32,53,44,14,30,2,80,25,34,6,62,26,103,16,19,63,9,149,24,121,41,22,11,113,83 }, - { 11,58,8,30,124,49,10,113,9,114,139,45,97,32,7,137,90,1,0,130,115,125,100,24,5,94,53,41,14,13,35,38 }, - { 125,105,9,36,131,49,8,130,39,11,10,5,22,38,41,104,0,31,13,24,27,16,2,72,65,91,48,32,84,18,100,74 }, - { 12,1,0,2,6,3,7,5,4,8,14,28,16,60,18,10,21,17,19,9,40,27,86,93,29,38,54,11,25,48,46,41 }, - { 84,41,38,72,92,29,111,5,65,120,79,0,27,56,48,14,132,16,119,22,86,88,46,28,62,12,1,2,93,18,24,127 }, - { 99,28,40,60,2,93,138,0,98,17,86,54,76,12,27,1,21,144,128,38,5,14,46,18,25,16,109,6,41,145,7,29 }, - { 1,63,10,32,148,14,103,34,42,7,8,108,116,53,64,96,25,121,26,94,140,0,29,19,55,24,100,136,5,4,44,115 }, - { 131,100,130,49,10,8,36,104,39,0,48,41,11,38,4,24,27,22,16,44,79,5,33,2,53,9,125,74,91,120,32,83 }, - { 36,39,131,74,4,91,22,33,125,104,130,48,10,24,16,5,49,8,100,105,79,0,9,65,71,2,18,83,31,11,19,44 }, - { 0,12,2,1,6,5,7,28,40,60,16,14,18,62,86,27,93,8,17,38,21,41,35,99,3,19,10,23,22,4,9,48 }, - { 1,7,67,14,21,147,111,55,132,119,0,8,2,76,64,16,47,84,6,18,86,95,145,10,42,29,133,5,56,134,17,72 }, - { 69,55,47,134,102,143,7,57,118,95,14,64,29,56,1,50,75,67,146,2,0,133,68,16,21,6,141,85,116,18,72,65 }, - { 1,44,7,24,83,63,34,103,22,121,53,32,25,35,0,115,108,5,14,8,10,101,94,30,2,123,110,26,137,47,90,19 }, - { 14,1,25,42,34,0,26,96,19,29,140,5,53,10,2,121,3,24,44,22,55,77,129,7,63,16,8,4,6,61,100,48 }, - { 30,90,7,8,137,94,85,1,47,113,115,108,45,139,124,11,10,32,50,58,103,14,63,64,9,116,49,42,25,148,0,53 }, - { 40,99,2,60,28,17,0,54,93,98,86,138,6,12,21,76,1,5,27,144,128,38,19,46,14,41,145,7,16,67,3,109 }, - { 45,58,30,139,90,7,85,137,97,8,124,47,1,11,106,114,50,94,0,113,10,115,14,32,9,64,108,41,49,29,62,116 }, - { 14,42,10,1,63,96,32,25,34,8,129,29,0,103,55,19,26,53,77,5,95,2,4,7,3,16,148,56,18,24,121,108 }, - { 21,2,75,86,6,76,144,28,119,99,93,147,141,67,102,145,60,132,146,128,0,82,40,138,55,111,143,17,133,112,69,14 }, - { 111,120,41,62,84,132,0,5,38,119,56,92,72,142,27,28,29,35,58,80,2,86,65,79,12,14,1,24,145,16,21,48 }, - { 146,67,141,69,133,21,6,143,57,55,111,147,56,1,14,132,7,2,134,102,0,119,29,84,76,64,86,72,28,68,47,75 }, - { 12,1,0,5,27,3,7,4,38,8,6,41,16,40,46,10,18,79,2,9,23,86,20,22,62,14,37,88,92,19,24,11 }, - { 0,12,2,1,27,5,38,28,60,6,40,7,16,46,18,14,41,99,93,62,3,79,86,23,149,8,22,35,88,17,19,10 }, - { 141,6,21,67,147,102,146,2,76,119,132,69,55,111,86,75,28,133,143,0,1,145,14,128,56,99,17,60,29,93,84,68 }, - { 21,76,1,119,86,145,2,0,14,7,6,138,146,55,17,28,132,93,67,40,60,143,29,147,111,16,69,141,5,56,19,133 }, - { 1,8,108,14,7,116,64,42,10,63,94,32,115,103,113,96,30,34,55,47,95,148,29,140,129,25,134,53,69,26,19,11 }, - { 12,1,3,5,4,2,0,7,8,38,27,16,18,6,10,20,41,40,79,46,9,23,22,88,92,37,14,24,62,19,48,99 }, - { 1,14,7,0,6,25,5,16,19,2,42,26,29,35,61,8,18,129,101,21,3,110,34,148,96,10,17,4,22,40,12,20 }, - { 0,2,5,1,3,19,22,26,16,24,29,7,14,6,4,25,18,44,8,48,12,61,20,21,10,35,65,56,23,40,17,107 }, - { 1,7,8,29,56,0,10,14,2,42,72,5,4,65,3,30,84,94,67,9,25,133,111,11,32,108,16,63,21,96,26,48 } - }; - - static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - - static inline uint8_t to_5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } - static inline uint8_t to_6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } - - template inline S maximum(S a, S b) { return (a > b) ? a : b; } - template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } - template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - - template inline S minimum(S a, S b) { return (a < b) ? a : b; } - template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } - template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } - - template inline T square(T a) { return a * a; } - - static inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high; return value; } - static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - - template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } - static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) value = high; return value; } - - static inline int squarei(int a) { return a * a; } - static inline int absi(int a) { return (a < 0) ? -a : a; } - - template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } - - enum class eNoClamp { cNoClamp }; - - struct color32 - { - union - { - struct - { - uint8_t r; - uint8_t g; - uint8_t b; - uint8_t a; - }; - - uint8_t c[4]; - - uint32_t m; - }; - - color32() { } - - color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { (void)unused; set_noclamp_rgba(vr, vg, vb, va); } - - void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); c[3] = static_cast(va); } - - void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { c[0] = static_cast(vr); c[1] = static_cast(vg); c[2] = static_cast(vb); } - void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - - void set_clamped(int vr, int vg, int vb, int va) { c[0] = clamp255(vr); c[1] = clamp255(vg); c[2] = clamp255(vb); c[3] = clamp255(va); } - - uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } - uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } - - bool operator== (const color32&rhs) const { return m == rhs.m; } - - void set_rgb(const color32& other) { c[0] = static_cast(other.c[0]); c[1] = static_cast(other.c[1]); c[2] = static_cast(other.c[2]); } - - static color32 comp_min(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); } - static color32 comp_max(const color32& a, const color32& b) { return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } - }; - + // Rate Distortion Optimization (RDO) enum dxt_constants { cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, @@ -1491,7 +294,7 @@ namespace rgbcx uint8_t m_low_color[cTotalEndpointBytes]; uint8_t m_high_color[cTotalEndpointBytes]; uint8_t m_selectors[cTotalSelectorBytes]; - + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline bool is_3color() const { return get_low_color() <= get_high_color(); } @@ -1500,6 +303,25 @@ namespace rgbcx inline uint32_t get_selector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; } inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } + inline uint32_t get_endpoint_bits() const { return m_low_color[0] | (m_low_color[1] << 8) | (m_high_color[0] << 16) | (m_high_color[1] << 24); } + inline void set_endpoint_bits(uint32_t s) { m_low_color[0] = (uint8_t)s; m_low_color[1] = (uint8_t)(s >> 8); m_high_color[0] = (uint8_t)(s >> 16); m_high_color[1] = (uint8_t)(s >> 24); } + + inline uint32_t get_selector_bits() const { return m_selectors[0] | (m_selectors[1] << 8) | (m_selectors[2] << 16) | (m_selectors[3] << 24); } + inline void set_selector_bits(uint32_t s) { m_selectors[0] = (uint8_t)s; m_selectors[1] = (uint8_t)(s >> 8); m_selectors[2] = (uint8_t)(s >> 16); m_selectors[3] = (uint8_t)(s >> 24); } + + inline bool any_selectors_transparent() const + { + uint32_t sel_bits = get_selector_bits(); + for (uint32_t i = 0; i < 16; i++) + { + if ((sel_bits & 3) == 3) + return true; + + sel_bits >>= 2; + } + return false; + } + static inline uint16_t pack_color(const color32& color, bool scaled, uint32_t bias = 127U) { uint32_t r = color.r, g = color.g, b = color.b; @@ -1533,2650 +355,78 @@ namespace rgbcx } }; - static const uint32_t TOTAL_ORDER_4_0_16 = 15; - static const uint32_t TOTAL_ORDER_4_1_16 = 700; - static const uint32_t TOTAL_ORDER_4_2_16 = 753; - static const uint32_t TOTAL_ORDER_4_3_16 = 515; - static uint16_t g_total_ordering4_hash[4096]; - static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; - - static const uint32_t TOTAL_ORDER_3_0_16 = 12; - static const uint32_t TOTAL_ORDER_3_1_16 = 15; - static const uint32_t TOTAL_ORDER_3_2_16 = 89; - static uint16_t g_total_ordering3_hash[256]; - static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; - - struct hist4 - { - uint8_t m_hist[4]; - - hist4() - { - memset(m_hist, 0, sizeof(m_hist)); - } - - hist4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) - { - m_hist[0] = (uint8_t)i; - m_hist[1] = (uint8_t)j; - m_hist[2] = (uint8_t)k; - m_hist[3] = (uint8_t)l; - } - - inline bool operator== (const hist4 &h) const - { - if (m_hist[0] != h.m_hist[0]) return false; - if (m_hist[1] != h.m_hist[1]) return false; - if (m_hist[2] != h.m_hist[2]) return false; - if (m_hist[3] != h.m_hist[3]) return false; - return true; - } - - inline bool any_16() const - { - return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16) || (m_hist[3] == 16); - } - - inline uint32_t lookup_total_ordering_index() const - { - if (m_hist[0] == 16) - return TOTAL_ORDER_4_0_16; - else if (m_hist[1] == 16) - return TOTAL_ORDER_4_1_16; - else if (m_hist[2] == 16) - return TOTAL_ORDER_4_2_16; - else if (m_hist[3] == 16) - return TOTAL_ORDER_4_3_16; - - // Must sum to 16, so m_hist[3] isn't needed. - return g_total_ordering4_hash[m_hist[0] | (m_hist[1] << 4) | (m_hist[2] << 8)]; - } - }; - - struct hist3 - { - uint8_t m_hist[3]; - - hist3() - { - memset(m_hist, 0, sizeof(m_hist)); - } - - hist3(uint32_t i, uint32_t j, uint32_t k) - { - m_hist[0] = (uint8_t)i; - m_hist[1] = (uint8_t)j; - m_hist[2] = (uint8_t)k; - } - - inline bool operator== (const hist3 &h) const - { - if (m_hist[0] != h.m_hist[0]) return false; - if (m_hist[1] != h.m_hist[1]) return false; - if (m_hist[2] != h.m_hist[2]) return false; - return true; - } - - inline bool any_16() const - { - return (m_hist[0] == 16) || (m_hist[1] == 16) || (m_hist[2] == 16); - } - - inline uint32_t lookup_total_ordering_index() const - { - if (m_hist[0] == 16) - return TOTAL_ORDER_3_0_16; - else if (m_hist[1] == 16) - return TOTAL_ORDER_3_1_16; - else if (m_hist[2] == 16) - return TOTAL_ORDER_3_2_16; - - // Must sum to 16, so m_hist[2] isn't needed. - return g_total_ordering3_hash[m_hist[0] | (m_hist[1] << 4)]; - } - }; - - struct bc1_match_entry + struct bc4_block { - uint8_t m_hi; - uint8_t m_lo; - uint8_t m_e; - }; - - static bc1_approx_mode g_bc1_approx_mode; - static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; - static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; - - static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } - static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } - - // v0, v1 = unexpanded DXT1 endpoint values (5/6-bits) - // c0, c1 = expanded DXT1 endpoint values (8-bits) - static inline int interp_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1) / 3; } - static inline int interp_5_6_ideal_round(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 2 + c1 + 1) / 3; } - static inline int interp_half_5_6_ideal(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1) / 2; } - - static inline int interp_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((2 * v0 + v1) * 22) / 8; } - static inline int interp_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } + enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; + uint8_t m_endpoints[2]; - static inline int interp_half_5_nv(int v0, int v1) { assert(v0 < 32 && v1 < 32); return ((v0 + v1) * 33) / 8; } - static inline int interp_half_6_nv(int c0, int c1) { assert(c0 < 256 && c1 < 256); const int gdiff = c1 - c0; return (256 * c0 + gdiff/4 + 128 + gdiff * 128) / 256; } + uint8_t m_selectors[cTotalSelectorBytes]; - static inline int interp_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 * 43 + c1 * 21 + 32) >> 6; } - static inline int interp_half_5_6_amd(int c0, int c1) { assert(c0 < 256 && c1 < 256); return (c0 + c1 + 1) >> 1; } + inline uint32_t get_low_alpha() const { return m_endpoints[0]; } + inline uint32_t get_high_alpha() const { return m_endpoints[1]; } + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); - switch (mode) + inline uint64_t get_selector_bits() const { - case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | + (((uint64_t)m_selectors[4]) << 32U) | + (((uint64_t)m_selectors[5]) << 40U); } - } - static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - (void)v0; (void)v1; - assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); - switch (mode) + inline void set_selector_bits(uint64_t v) { - case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: return interp_5_6_amd(c0, c1); - default: - case bc1_approx_mode::cBC1Ideal: return interp_5_6_ideal(c0, c1); - case bc1_approx_mode::cBC1IdealRound4: return interp_5_6_ideal_round(c0, c1); + for (uint32_t i = 0; i < 6; i++) + { + m_selectors[i] = (uint8_t)v; + v >>= 8; + } } - } - static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); - switch (mode) + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const { - case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); - case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + assert((x < 4U) && (y < 4U)); + return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); } - } - static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) - { - (void)v0; (void)v1; - assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); - switch (mode) + static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h) { - case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); - case bc1_approx_mode::cBC1AMD: return interp_half_5_6_amd(c0, c1); - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - default: - return interp_half_5_6_ideal(c0, c1); + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 4 + h) / 5); + pDst[3] = static_cast((l * 3 + h * 2) / 5); + pDst[4] = static_cast((l * 2 + h * 3) / 5); + pDst[5] = static_cast((l + h * 4) / 5); + pDst[6] = 0; + pDst[7] = 255; + return 6; } - } - static void prepare_bc1_single_color_table_half(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) - { - for (int i = 0; i < 256; i++) + static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h) { - int lowest_e = 256; - for (int lo = 0; lo < size; lo++) - { - const int lo_e = pExpand[lo]; - - for (int hi = 0; hi < size; hi++) - { - const int hi_e = pExpand[hi]; - - const int v = (size == 32) ? interp_half_5(hi, lo, hi_e, lo_e, mode) : interp_half_6(hi, lo, hi_e, lo_e, mode); - - int e = iabs(v - i); - - // We only need to factor in 3% error in BC1 ideal mode. - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; - - // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. - if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) - { - pTable[i].m_hi = static_cast(hi); - pTable[i].m_lo = static_cast(lo); - - assert(e <= UINT8_MAX); - pTable[i].m_e = static_cast(e); - - lowest_e = e; - } - - } // hi - } // lo + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 6 + h) / 7); + pDst[3] = static_cast((l * 5 + h * 2) / 7); + pDst[4] = static_cast((l * 4 + h * 3) / 7); + pDst[5] = static_cast((l * 3 + h * 4) / 7); + pDst[6] = static_cast((l * 2 + h * 5) / 7); + pDst[7] = static_cast((l + h * 6) / 7); + return 8; } - } - static void prepare_bc1_single_color_table(bc1_match_entry* pTable, const uint8_t* pExpand, int size, bc1_approx_mode mode) - { - for (int i = 0; i < 256; i++) + static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h) { - int lowest_e = 256; - for (int lo = 0; lo < size; lo++) - { - const int lo_e = pExpand[lo]; - - for (int hi = 0; hi < size; hi++) - { - const int hi_e = pExpand[hi]; - - const int v = (size == 32) ? interp_5(hi, lo, hi_e, lo_e, mode) : interp_6(hi, lo, hi_e, lo_e, mode); - - int e = iabs(v - i); - - if ((mode == bc1_approx_mode::cBC1Ideal) || (mode == bc1_approx_mode::cBC1IdealRound4)) - e += (iabs(hi_e - lo_e) * 3) / 100; - - // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. - if ((e < lowest_e) || ((e == lowest_e) && (lo == hi))) - { - pTable[i].m_hi = static_cast(hi); - pTable[i].m_lo = static_cast(lo); - - assert(e <= UINT8_MAX); - pTable[i].m_e = static_cast(e); - - lowest_e = e; - } - - } // hi - } // lo + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); } - } - - // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) - // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. - static const uint32_t g_weight_vals4[4] = { 0x000009, 0x010204, 0x040201, 0x090000 }; - - // multiplier is 4 for 3-color - static const uint32_t g_weight_vals3[3] = { 0x000004, 0x040000, 0x010101 }; - - static inline void compute_selector_factors4(const hist4 &h, float &iz00, float &iz10, float &iz11) - { - uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 4; sel++) - weight_accum += g_weight_vals4[sel] * h.m_hist[sel]; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - det = 0.0f; - else - det = (3.0f / 255.0f) / det; - - iz00 = z11 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - } - - static inline void compute_selector_factors3(const hist3 &h, float &iz00, float &iz10, float &iz11) - { - uint32_t weight_accum = 0; - for (uint32_t sel = 0; sel < 3; sel++) - weight_accum += g_weight_vals3[sel] * h.m_hist[sel]; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; + }; - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - det = 0.0f; - else - det = (2.0f / 255.0f) / det; - - iz00 = z11 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - } - - static bool g_initialized; - - void init(bc1_approx_mode mode) - { - g_bc1_approx_mode = mode; - - uint8_t bc1_expand5[32]; - for (int i = 0; i < 32; i++) - bc1_expand5[i] = static_cast((i << 3) | (i >> 2)); - prepare_bc1_single_color_table(g_bc1_match5_equals_1, bc1_expand5, 32, mode); - prepare_bc1_single_color_table_half(g_bc1_match5_half, bc1_expand5, 32, mode); - - uint8_t bc1_expand6[64]; - for (int i = 0; i < 64; i++) - bc1_expand6[i] = static_cast((i << 2) | (i >> 4)); - prepare_bc1_single_color_table(g_bc1_match6_equals_1, bc1_expand6, 64, mode); - prepare_bc1_single_color_table_half(g_bc1_match6_half, bc1_expand6, 64, mode); - - for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS4; i++) - { - hist4 h; - h.m_hist[0] = (uint8_t)g_unique_total_orders4[i][0]; - h.m_hist[1] = (uint8_t)g_unique_total_orders4[i][1]; - h.m_hist[2] = (uint8_t)g_unique_total_orders4[i][2]; - h.m_hist[3] = (uint8_t)g_unique_total_orders4[i][3]; - - if (!h.any_16()) - { - const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4) | (h.m_hist[2] << 8); - assert(index < 4096); - g_total_ordering4_hash[index] = (uint16_t)i; - } - - compute_selector_factors4(h, g_selector_factors4[i][0], g_selector_factors4[i][1], g_selector_factors4[i][2]); - } - - for (uint32_t i = 0; i < NUM_UNIQUE_TOTAL_ORDERINGS3; i++) - { - hist3 h; - h.m_hist[0] = (uint8_t)g_unique_total_orders3[i][0]; - h.m_hist[1] = (uint8_t)g_unique_total_orders3[i][1]; - h.m_hist[2] = (uint8_t)g_unique_total_orders3[i][2]; - - if (!h.any_16()) - { - const uint32_t index = h.m_hist[0] | (h.m_hist[1] << 4); - assert(index < 256); - g_total_ordering3_hash[index] = (uint16_t)i; - } - - compute_selector_factors3(h, g_selector_factors3[i][0], g_selector_factors3[i][1], g_selector_factors3[i][2]); - } - - g_initialized = true; - } - - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb, bool allow_3color) - { - bc1_block* pDst_block = static_cast(pDst); - - uint32_t mask = 0xAA; - int max16 = -1, min16 = 0; - - if (allow_3color) - { - const uint32_t err4 = g_bc1_match5_equals_1[fr].m_e + g_bc1_match6_equals_1[fg].m_e + g_bc1_match5_equals_1[fb].m_e; - const uint32_t err3 = g_bc1_match5_half[fr].m_e + g_bc1_match6_half[fg].m_e + g_bc1_match5_half[fb].m_e; - - if (err3 < err4) - { - max16 = (g_bc1_match5_half[fr].m_hi << 11) | (g_bc1_match6_half[fg].m_hi << 5) | g_bc1_match5_half[fb].m_hi; - min16 = (g_bc1_match5_half[fr].m_lo << 11) | (g_bc1_match6_half[fg].m_lo << 5) | g_bc1_match5_half[fb].m_lo; - - if (max16 > min16) - std::swap(max16, min16); - } - } - - if (max16 == -1) - { - max16 = (g_bc1_match5_equals_1[fr].m_hi << 11) | (g_bc1_match6_equals_1[fg].m_hi << 5) | g_bc1_match5_equals_1[fb].m_hi; - min16 = (g_bc1_match5_equals_1[fr].m_lo << 11) | (g_bc1_match6_equals_1[fg].m_lo << 5) | g_bc1_match5_equals_1[fb].m_lo; - - if (min16 == max16) - { - // Always forbid 3 color blocks - // This is to guarantee that BC3 blocks never use punchthrough alpha (3 color) mode, which isn't supported on some (all?) GPU's. - mask = 0; - - // Make l > h - if (min16 > 0) - min16--; - else - { - // l = h = 0 - assert(min16 == max16 && max16 == 0); - - max16 = 1; - min16 = 0; - mask = 0x55; - } - - assert(max16 > min16); - } - - if (max16 < min16) - { - std::swap(max16, min16); - mask ^= 0x55; - } - } - - pDst_block->set_low_color(static_cast(max16)); - pDst_block->set_high_color(static_cast(min16)); - pDst_block->m_selectors[0] = static_cast(mask); - pDst_block->m_selectors[1] = static_cast(mask); - pDst_block->m_selectors[2] = static_cast(mask); - pDst_block->m_selectors[3] = static_cast(mask); - } - - static const float g_midpoint5[32] = { .015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, .370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f, .725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f }; - static const float g_midpoint6[64] = { .007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f, .180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f, .356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f, .533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f, .705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f, .882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f }; - - struct vec3F { float c[3]; }; - - static inline void compute_least_squares_endpoints4_rgb( - vec3F* pXl, vec3F* pXh, - int total_r, int total_g, int total_b, - float iz00, float iz10, float iz11, - uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) - { - const float iz01 = iz10; - - const uint32_t f1 = g_unique_total_orders4[s][0]; - const uint32_t f2 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1]; - const uint32_t f3 = g_unique_total_orders4[s][0] + g_unique_total_orders4[s][1] + g_unique_total_orders4[s][2]; - uint32_t uq00_r = (r_sum[f2] - r_sum[f1]) + (r_sum[f3] - r_sum[f2]) * 2 + (r_sum[16] - r_sum[f3]) * 3; - uint32_t uq00_g = (g_sum[f2] - g_sum[f1]) + (g_sum[f3] - g_sum[f2]) * 2 + (g_sum[16] - g_sum[f3]) * 3; - uint32_t uq00_b = (b_sum[f2] - b_sum[f1]) + (b_sum[f3] - b_sum[f2]) * 2 + (b_sum[16] - b_sum[f3]) * 3; - - float q10_r = (float)(total_r * 3 - uq00_r); - float q10_g = (float)(total_g * 3 - uq00_g); - float q10_b = (float)(total_b * 3 - uq00_b); - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - } - - static inline bool compute_least_squares_endpoints4_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh, int total_r, int total_g, int total_b) - { - uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; - uint32_t weight_accum = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; - const uint8_t sel = pSelectors[i]; - - weight_accum += g_weight_vals4[sel]; - uq00_r += sel * r; - uq00_g += sel * g; - uq00_b += sel * b; - } - - int q10_r = total_r * 3 - uq00_r; - int q10_g = total_g * 3 - uq00_g; - int q10_b = total_b * 3 - uq00_b; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; - - det = (3.0f / 255.0f) / det; - - float iz00, iz01, iz10, iz11; - iz00 = z11 * det; - iz01 = -z01 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - - return true; - } - - static inline void compute_least_squares_endpoints3_rgb( - vec3F* pXl, vec3F* pXh, - int total_r, int total_g, int total_b, - float iz00, float iz10, float iz11, - uint32_t s, const uint32_t r_sum[17], const uint32_t g_sum[17], const uint32_t b_sum[17]) - { - const float iz01 = iz10; - - // Compensates for BC1 3-color ordering, which is selector 0, 2, 1 - const uint32_t f1 = g_unique_total_orders3[s][0]; - const uint32_t f2 = g_unique_total_orders3[s][0] + g_unique_total_orders3[s][2]; - uint32_t uq00_r = (r_sum[16] - r_sum[f2]) * 2 + (r_sum[f2] - r_sum[f1]); - uint32_t uq00_g = (g_sum[16] - g_sum[f2]) * 2 + (g_sum[f2] - g_sum[f1]); - uint32_t uq00_b = (b_sum[16] - b_sum[f2]) * 2 + (b_sum[f2] - b_sum[f1]); - - float q10_r = (float)(total_r * 2 - uq00_r); - float q10_g = (float)(total_g * 2 - uq00_g); - float q10_b = (float)(total_b * 2 - uq00_b); - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - } - - static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) - { - int uq00_r = 0, uq00_g = 0, uq00_b = 0; - uint32_t weight_accum = 0; - int total_r = 0, total_g = 0, total_b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; - if (use_black) - { - if ((r | g | b) < 4) - continue; - } - - const uint8_t sel = pSelectors[i]; - assert(sel <= 3); - if (sel == 3) - continue; - - weight_accum += g_weight_vals3[sel]; - - static const uint8_t s_tran[3] = { 0, 2, 1 }; - const uint8_t tsel = s_tran[sel]; - uq00_r += tsel * r; - uq00_g += tsel * g; - uq00_b += tsel * b; - - total_r += r; - total_g += g; - total_b += b; - } - - int q10_r = total_r * 2 - uq00_r; - int q10_g = total_g * 2 - uq00_g; - int q10_b = total_b * 2 - uq00_b; - - float z00 = (float)((weight_accum >> 16) & 0xFF); - float z10 = (float)((weight_accum >> 8) & 0xFF); - float z11 = (float)(weight_accum & 0xFF); - float z01 = z10; - - float det = z00 * z11 - z01 * z10; - if (fabs(det) < 1e-8f) - return false; - - det = (2.0f / 255.0f) / det; - - float iz00, iz01, iz10, iz11; - iz00 = z11 * det; - iz01 = -z01 * det; - iz10 = -z10 * det; - iz11 = z00 * det; - - pXl->c[0] = iz00 * (float)uq00_r + iz01 * q10_r; - pXh->c[0] = iz10 * (float)uq00_r + iz11 * q10_r; - - pXl->c[1] = iz00 * (float)uq00_g + iz01 * q10_g; - pXh->c[1] = iz10 * (float)uq00_g + iz11 * q10_g; - - pXl->c[2] = iz00 * (float)uq00_b + iz01 * q10_b; - pXh->c[2] = iz10 * (float)uq00_b + iz11 * q10_b; - - return true; - } - - static inline void bc1_get_block_colors4(uint32_t block_r[4], uint32_t block_g[4], uint32_t block_b[4], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) - { - block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); - block_r[3] = (hr << 3) | (hr >> 2); block_g[3] = (hg << 2) | (hg >> 4); block_b[3] = (hb << 3) | (hb >> 2); - - if (g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) - { - block_r[1] = (block_r[0] * 2 + block_r[3]) / 3; block_g[1] = (block_g[0] * 2 + block_g[3]) / 3; block_b[1] = (block_b[0] * 2 + block_b[3]) / 3; - block_r[2] = (block_r[3] * 2 + block_r[0]) / 3; block_g[2] = (block_g[3] * 2 + block_g[0]) / 3; block_b[2] = (block_b[3] * 2 + block_b[0]) / 3; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4) - { - block_r[1] = (block_r[0] * 2 + block_r[3] + 1) / 3; block_g[1] = (block_g[0] * 2 + block_g[3] + 1) / 3; block_b[1] = (block_b[0] * 2 + block_b[3] + 1) / 3; - block_r[2] = (block_r[3] * 2 + block_r[0] + 1) / 3; block_g[2] = (block_g[3] * 2 + block_g[0] + 1) / 3; block_b[2] = (block_b[3] * 2 + block_b[0] + 1) / 3; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) - { - block_r[1] = interp_5_6_amd(block_r[0], block_r[3]); block_g[1] = interp_5_6_amd(block_g[0], block_g[3]); block_b[1] = interp_5_6_amd(block_b[0], block_b[3]); - block_r[2] = interp_5_6_amd(block_r[3], block_r[0]); block_g[2] = interp_5_6_amd(block_g[3], block_g[0]); block_b[2] = interp_5_6_amd(block_b[3], block_b[0]); - } - else - { - block_r[1] = interp_5_nv(lr, hr); block_g[1] = interp_6_nv(block_g[0], block_g[3]); block_b[1] = interp_5_nv(lb, hb); - block_r[2] = interp_5_nv(hr, lr); block_g[2] = interp_6_nv(block_g[3], block_g[0]); block_b[2] = interp_5_nv(hb, lb); - } - } - - static inline void bc1_get_block_colors3(uint32_t block_r[3], uint32_t block_g[3], uint32_t block_b[3], uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb) - { - block_r[0] = (lr << 3) | (lr >> 2); block_g[0] = (lg << 2) | (lg >> 4); block_b[0] = (lb << 3) | (lb >> 2); - block_r[1] = (hr << 3) | (hr >> 2); block_g[1] = (hg << 2) | (hg >> 4); block_b[1] = (hb << 3) | (hb >> 2); - - if ((g_bc1_approx_mode == bc1_approx_mode::cBC1Ideal) || (g_bc1_approx_mode == bc1_approx_mode::cBC1IdealRound4)) - { - block_r[2] = (block_r[0] + block_r[1]) / 2; block_g[2] = (block_g[0] + block_g[1]) / 2; block_b[2] = (block_b[0] + block_b[1]) / 2; - } - else if (g_bc1_approx_mode == bc1_approx_mode::cBC1AMD) - { - block_r[2] = interp_half_5_6_amd(block_r[0], block_r[1]); block_g[2] = interp_half_5_6_amd(block_g[0], block_g[1]); block_b[2] = interp_half_5_6_amd(block_b[0], block_b[1]); - } - else - { - block_r[2] = interp_half_5_nv(lr, hr); block_g[2] = interp_half_6_nv(block_g[0], block_g[1]); block_b[2] = interp_half_5_nv(lb, hb); - } - } - - static inline void bc1_find_sels4_noerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16]) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; - - int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - - int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; - - ar *= 2; ag *= 2; ab *= 2; - - static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - - for (uint32_t i = 0; i < 16; i += 4) - { - const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; - const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; - const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; - const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; - - sels[i+0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; - sels[i+1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; - sels[i+2] = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; - sels[i+3] = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; - } - } - - static inline uint32_t bc1_find_sels4_fasterr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int ar = block_r[3] - block_r[0], ag = block_g[3] - block_g[0], ab = block_b[3] - block_b[0]; - - int dots[4]; - for (uint32_t i = 0; i < 4; i++) - dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - - int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; - - ar *= 2; ag *= 2; ab *= 2; - - static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i += 4) - { - const int d0 = pSrc_pixels[i+0].r * ar + pSrc_pixels[i+0].g * ag + pSrc_pixels[i+0].b * ab; - const int d1 = pSrc_pixels[i+1].r * ar + pSrc_pixels[i+1].g * ag + pSrc_pixels[i+1].b * ab; - const int d2 = pSrc_pixels[i+2].r * ar + pSrc_pixels[i+2].g * ag + pSrc_pixels[i+2].b * ab; - const int d3 = pSrc_pixels[i+3].r * ar + pSrc_pixels[i+3].g * ag + pSrc_pixels[i+3].b * ab; - - uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; - uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; - uint8_t sel2 = s_sels[(d2 <= t0) + (d2 < t1) + (d2 < t2)]; - uint8_t sel3 = s_sels[(d3 <= t0) + (d3 < t1) + (d3 < t2)]; - - sels[i+0] = sel0; - sels[i+1] = sel1; - sels[i+2] = sel2; - sels[i+3] = sel3; - - total_err += squarei(pSrc_pixels[i+0].r - block_r[sel0]) + squarei(pSrc_pixels[i+0].g - block_g[sel0]) + squarei(pSrc_pixels[i+0].b - block_b[sel0]); - total_err += squarei(pSrc_pixels[i+1].r - block_r[sel1]) + squarei(pSrc_pixels[i+1].g - block_g[sel1]) + squarei(pSrc_pixels[i+1].b - block_b[sel1]); - total_err += squarei(pSrc_pixels[i+2].r - block_r[sel2]) + squarei(pSrc_pixels[i+2].g - block_g[sel2]) + squarei(pSrc_pixels[i+2].b - block_b[sel2]); - total_err += squarei(pSrc_pixels[i+3].r - block_r[sel3]) + squarei(pSrc_pixels[i+3].g - block_g[sel3]) + squarei(pSrc_pixels[i+3].b - block_b[sel3]); - - if (total_err >= cur_err) - break; - } - - return total_err; - } - - static inline uint32_t bc1_find_sels4_check2_err(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - int dr = block_r[3] - block_r[0], dg = block_g[3] - block_g[0], db = block_b[3] - block_b[0]; - - const float f = 4.0f / (float)(squarei(dr) + squarei(dg) + squarei(db) + .00000125f); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f); - sel = clampi(sel, 1, 3); - - uint32_t err0 = squarei((int)block_r[sel - 1] - (int)r) + squarei((int)block_g[sel - 1] - (int)g) + squarei((int)block_b[sel - 1] - (int)b); - uint32_t err1 = squarei((int)block_r[sel] - (int)r) + squarei((int)block_g[sel] - (int)g) + squarei((int)block_b[sel] - (int)b); - - int best_sel = sel; - uint32_t best_err = err1; - if (err0 == err1) - { - // Prefer non-interpolation - if ((best_sel - 1) == 0) - best_sel = 0; - } - else if (err0 < best_err) - { - best_sel = sel - 1; - best_err = err0; - } - - total_err += best_err; - - if (total_err >= cur_err) - break; - - sels[i] = (uint8_t)best_sel; - } - return total_err; - } - - static inline uint32_t bc1_find_sels4_fullerr(const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[4], block_g[4], block_b[4]; - bc1_get_block_colors4(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); - uint8_t best_sel = 0; - - for (uint32_t j = 1; (j < 4) && best_err; j++) - { - uint32_t err = squarei((int)block_r[j] - (int)r) + squarei((int)block_g[j] - (int)g) + squarei((int)block_b[j] - (int)b); - if ( (err < best_err) || ((err == best_err) && (j == 3)) ) - { - best_err = err; - best_sel = (uint8_t)j; - } - } - - total_err += best_err; - - if (total_err >= cur_err) - break; - - sels[i] = (uint8_t)best_sel; - } - return total_err; - } - - static inline uint32_t bc1_find_sels4(uint32_t flags, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t err; - - if (flags & cEncodeBC1UseFasterMSEEval) - err = bc1_find_sels4_fasterr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - else if (flags & cEncodeBC1UseFullMSEEval) - err = bc1_find_sels4_fullerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - else - err = bc1_find_sels4_check2_err(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels, cur_err); - - return err; - } - - static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const color32* pSrc_pixels, uint32_t lr, uint32_t lg, uint32_t lb, uint32_t hr, uint32_t hg, uint32_t hb, uint8_t sels[16], uint32_t cur_err) - { - uint32_t block_r[3], block_g[3], block_b[3]; - bc1_get_block_colors3(block_r, block_g, block_b, lr, lg, lb, hr, hg, hb); - - uint32_t total_err = 0; - - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r; - const int g = pSrc_pixels[i].g; - const int b = pSrc_pixels[i].b; - - uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); - uint32_t best_sel = 0; - - uint32_t err1 = squarei((int)block_r[1] - (int)r) + squarei((int)block_g[1] - (int)g) + squarei((int)block_b[1] - (int)b); - if (err1 < best_err) - { - best_err = err1; - best_sel = 1; - } - - uint32_t err2 = squarei((int)block_r[2] - (int)r) + squarei((int)block_g[2] - (int)g) + squarei((int)block_b[2] - (int)b); - if (err2 < best_err) - { - best_err = err2; - best_sel = 2; - } - - if (use_black) - { - uint32_t err3 = squarei(r) + squarei(g) + squarei(b); - if (err3 < best_err) - { - best_err = err3; - best_sel = 3; - } - } - - total_err += best_err; - if (total_err >= cur_err) - return total_err; - - sels[i] = (uint8_t)best_sel; - } - - return total_err; - } - - static inline void precise_round_565(const vec3F &xl, const vec3F &xh, - int &trial_lr, int &trial_lg, int &trial_lb, - int &trial_hr, int &trial_hg, int &trial_hb) - { - trial_lr = (int)(xl.c[0] * 31.0f); - trial_lg = (int)(xl.c[1] * 63.0f); - trial_lb = (int)(xl.c[2] * 31.0f); - - trial_hr = (int)(xh.c[0] * 31.0f); - trial_hg = (int)(xh.c[1] * 63.0f); - trial_hb = (int)(xh.c[2] * 31.0f); - - if ((uint32_t)(trial_lr | trial_lb | trial_hr | trial_hb) > 31U) - { - trial_lr = ((uint32_t)trial_lr > 31U) ? (~trial_lr >> 31) & 31 : trial_lr; - trial_hr = ((uint32_t)trial_hr > 31U) ? (~trial_hr >> 31) & 31 : trial_hr; - - trial_lb = ((uint32_t)trial_lb > 31U) ? (~trial_lb >> 31) & 31 : trial_lb; - trial_hb = ((uint32_t)trial_hb > 31U) ? (~trial_hb >> 31) & 31 : trial_hb; - } - - if ((uint32_t)(trial_lg | trial_hg) > 63U) - { - trial_lg = ((uint32_t)trial_lg > 63U) ? (~trial_lg >> 31) & 63 : trial_lg; - trial_hg = ((uint32_t)trial_hg > 63U) ? (~trial_hg >> 31) & 63 : trial_hg; - } - - trial_lr = (trial_lr + (xl.c[0] > g_midpoint5[trial_lr])) & 31; - trial_lg = (trial_lg + (xl.c[1] > g_midpoint6[trial_lg])) & 63; - trial_lb = (trial_lb + (xl.c[2] > g_midpoint5[trial_lb])) & 31; - - trial_hr = (trial_hr + (xh.c[0] > g_midpoint5[trial_hr])) & 31; - trial_hg = (trial_hg + (xh.c[1] > g_midpoint6[trial_hg])) & 63; - trial_hb = (trial_hb + (xh.c[2] > g_midpoint5[trial_hb])) & 31; - } - - static inline void precise_round_565_noscale(vec3F xl, vec3F xh, - int &trial_lr, int &trial_lg, int &trial_lb, - int &trial_hr, int &trial_hg, int &trial_hb) - { - xl.c[0] *= 1.0f/255.0f; - xl.c[1] *= 1.0f/255.0f; - xl.c[2] *= 1.0f/255.0f; - - xh.c[0] *= 1.0f/255.0f; - xh.c[1] *= 1.0f/255.0f; - xh.c[2] *= 1.0f/255.0f; - - precise_round_565(xl, xh, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb); - } - - static inline void bc1_encode4(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) - { - uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); - - // Always forbid 3 color blocks - if (lc16 == hc16) - { - uint8_t mask = 0; - - // Make l > h - if (hc16 > 0) - hc16--; - else - { - // lc16 = hc16 = 0 - assert(lc16 == hc16 && hc16 == 0); - - hc16 = 0; - lc16 = 1; - mask = 0x55; // select hc16 - } - - assert(lc16 > hc16); - pDst_block->set_low_color(static_cast(lc16)); - pDst_block->set_high_color(static_cast(hc16)); - - pDst_block->m_selectors[0] = mask; - pDst_block->m_selectors[1] = mask; - pDst_block->m_selectors[2] = mask; - pDst_block->m_selectors[3] = mask; - } - else - { - uint8_t invert_mask = 0; - if (lc16 < hc16) - { - std::swap(lc16, hc16); - invert_mask = 0x55; - } - - assert(lc16 > hc16); - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); - - uint32_t packed_sels = 0; - static const uint8_t s_sel_trans[4] = { 0, 2, 3, 1 }; - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); - - pDst_block->m_selectors[0] = (uint8_t)packed_sels ^ invert_mask; - pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; - pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; - pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; - } - } - - static inline void bc1_encode3(bc1_block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) - { - uint32_t lc16 = bc1_block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = bc1_block::pack_unscaled_color(hr, hg, hb); - - bool invert_flag = false; - if (lc16 > hc16) - { - std::swap(lc16, hc16); - invert_flag = true; - } - - assert(lc16 <= hc16); - - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); - - uint32_t packed_sels = 0; - - if (invert_flag) - { - static const uint8_t s_sel_trans_inv[4] = { 1, 0, 2, 3 }; - - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)s_sel_trans_inv[sels[i]] << (i * 2)); - } - else - { - for (uint32_t i = 0; i < 16; i++) - packed_sels |= ((uint32_t)sels[i] << (i * 2)); - } - - pDst_block->m_selectors[0] = (uint8_t)packed_sels; - pDst_block->m_selectors[1] = (uint8_t)(packed_sels >> 8); - pDst_block->m_selectors[2] = (uint8_t)(packed_sels >> 16); - pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24); - } - - struct bc1_encode_results - { - int lr, lg, lb; - int hr, hg, hb; - uint8_t sels[16]; - bool m_3color; - }; - - static bool try_3color_block_useblack(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, bc1_encode_results &results) - { - int total_r = 0, total_g = 0, total_b = 0; - int max_r = 0, max_g = 0, max_b = 0; - int min_r = 255, min_g = 255, min_b = 255; - int total_pixels = 0; - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - if ((r | g | b) < 4) - continue; - - max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); - min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); - total_r += r; total_g += g; total_b += b; - - total_pixels++; - } - - if (!total_pixels) - return false; - - int half_total_pixels = total_pixels >> 1; - int avg_r = (total_r + half_total_pixels) / total_pixels; - int avg_g = (total_g + half_total_pixels) / total_pixels; - int avg_b = (total_b + half_total_pixels) / total_pixels; - - uint32_t low_c = 0, high_c = 0; - - int icov[6] = { 0, 0, 0, 0, 0, 0 }; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r; - int g = (int)pSrc_pixels[i].g; - int b = (int)pSrc_pixels[i].b; - - if ((r | g | b) < 4) - continue; - - r -= avg_r; - g -= avg_g; - b -= avg_b; - - icov[0] += r * r; - icov[1] += r * g; - icov[2] += r * b; - icov[3] += g * g; - icov[4] += g * b; - icov[5] += b * b; - } - - float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); - - float xr = (float)(max_r - min_r); - float xg = (float)(max_g - min_g); - float xb = (float)(max_b - min_b); - - if (icov[2] < 0) - xr = -xr; - - if (icov[4] < 0) - xg = -xg; - - for (uint32_t power_iter = 0; power_iter < 4; power_iter++) - { - float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; - float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; - float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; - xr = r; xg = g; xb = b; - } - - float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); - int saxis_r = 306, saxis_g = 601, saxis_b = 117; - if (k >= 2) - { - float m = 1024.0f / k; - saxis_r = (int)(xr * m); - saxis_g = (int)(xg * m); - saxis_b = (int)(xb * m); - } - - int low_dot = INT_MAX, high_dot = INT_MIN; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b; - - if ((r | g | b) < 4) - continue; - - int dot = r * saxis_r + g * saxis_g + b * saxis_b; - if (dot < low_dot) - { - low_dot = dot; - low_c = i; - } - if (dot > high_dot) - { - high_dot = dot; - high_c = i; - } - } - - int lr = to_5(pSrc_pixels[low_c].r); - int lg = to_6(pSrc_pixels[low_c].g); - int lb = to_5(pSrc_pixels[low_c].b); - - int hr = to_5(pSrc_pixels[high_c].r); - int hg = to_6(pSrc_pixels[high_c].g); - int hb = to_5(pSrc_pixels[high_c].b); - - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); - - if (trial_err) - { - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t trials = 0; trials < total_ls_passes; trials++) - { - vec3F xl, xh; - int lr2, lg2, lb2, hr2, hg2, hb2; - if (!compute_least_squares_endpoints3_rgb(true, pSrc_pixels, trial_sels, &xl, &xh)) - { - lr2 = g_bc1_match5_half[avg_r].m_hi; - lg2 = g_bc1_match6_half[avg_g].m_hi; - lb2 = g_bc1_match5_half[avg_b].m_hi; - - hr2 = g_bc1_match5_half[avg_r].m_lo; - hg2 = g_bc1_match6_half[avg_g].m_lo; - hb2 = g_bc1_match5_half[avg_b].m_lo; - } - else - { - precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); - } - - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(true, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - lr = lr2; lg = lg2; lb = lb2; - hr = hr2; hg = hg2; hb = hb2; - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - else - break; - } - } - - if (trial_err < cur_err) - { - results.m_3color = true; - results.lr = lr; - results.lg = lg; - results.lb = lb; - results.hr = hr; - results.hg = hg; - results.hb = hb; - memcpy(results.sels, trial_sels, 16); - - cur_err = trial_err; - - return true; - } - - return false; - } - - static bool try_3color_block(const color32* pSrc_pixels, uint32_t flags, uint32_t &cur_err, - int avg_r, int avg_g, int avg_b, int lr, int lg, int lb, int hr, int hg, int hb, int total_r, int total_g, int total_b, uint32_t total_orderings_to_try, - bc1_encode_results &results) - { - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels3_fullerr(false, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); - - if (trial_err) - { - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t trials = 0; trials < total_ls_passes; trials++) - { - vec3F xl, xh; - int lr2, lg2, lb2, hr2, hg2, hb2; - if (!compute_least_squares_endpoints3_rgb(false, pSrc_pixels, trial_sels, &xl, &xh)) - { - lr2 = g_bc1_match5_half[avg_r].m_hi; - lg2 = g_bc1_match6_half[avg_g].m_hi; - lb2 = g_bc1_match5_half[avg_b].m_hi; - - hr2 = g_bc1_match5_half[avg_r].m_lo; - hg2 = g_bc1_match6_half[avg_g].m_lo; - hb2 = g_bc1_match5_half[avg_b].m_lo; - } - else - { - precise_round_565(xl, xh, hr2, hg2, hb2, lr2, lg2, lb2); - } - - if ((lr == lr2) && (lg == lg2) && (lb == lb2) && (hr == hr2) && (hg == hg2) && (hb == hb2)) - break; - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, lr2, lg2, lb2, hr2, hg2, hb2, trial_sels2, trial_err); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - lr = lr2; lg = lg2; lb = lb2; - hr = hr2; hg = hg2; hb = hb2; - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - else - break; - } - } - - if ((trial_err) && (flags & cEncodeBC1UseLikelyTotalOrderings) && (total_orderings_to_try)) - { - hist3 h; - for (uint32_t i = 0; i < 16; i++) - { - assert(trial_sels[i] < 3); - h.m_hist[trial_sels[i]]++; - } - - const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); - - int r0, g0, b0, r3, g3, b3; - r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); - r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); - - int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; - - int dots[16]; - for (uint32_t i = 0; i < 16; i++) - { - int r = pSrc_pixels[i].r; - int g = pSrc_pixels[i].g; - int b = pSrc_pixels[i].b; - int d = 0x1000000 + (r * ar + g * ag + b * ab); - assert(d >= 0); - dots[i] = (d << 4) + i; - } - - std::sort(dots, dots + 16); - - uint32_t r_sum[17], g_sum[17], b_sum[17]; - uint32_t r = 0, g = 0, b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t p = dots[i] & 15; - - r_sum[i] = r; - g_sum[i] = g; - b_sum[i] = b; - - r += pSrc_pixels[p].r; - g += pSrc_pixels[p].g; - b += pSrc_pixels[p].b; - } - - r_sum[16] = total_r; - g_sum[16] = total_g; - b_sum[16] = total_b; - - const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS3 : std::min(total_orderings_to_try, MAX_TOTAL_ORDERINGS3); - for (uint32_t q = 0; q < q_total; q++) - { - const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings3[orig_total_order_index][q]; - - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - - if ((s == TOTAL_ORDER_3_0_16) || (s == TOTAL_ORDER_3_1_16) || (s == TOTAL_ORDER_3_2_16)) - { - trial_lr = g_bc1_match5_half[avg_r].m_hi; - trial_lg = g_bc1_match6_half[avg_g].m_hi; - trial_lb = g_bc1_match5_half[avg_b].m_hi; - - trial_hr = g_bc1_match5_half[avg_r].m_lo; - trial_hg = g_bc1_match6_half[avg_g].m_lo; - trial_hb = g_bc1_match5_half[avg_b].m_lo; - } - else - { - compute_least_squares_endpoints3_rgb(&xl, &xh, total_r, total_g, total_b, - g_selector_factors3[s][0], g_selector_factors3[s][1], g_selector_factors3[s][2], s, r_sum, g_sum, b_sum); - - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - uint8_t trial_sels2[16]; - uint32_t trial_err2 = bc1_find_sels3_fullerr(false, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels2, UINT32_MAX); - - if (trial_err2 < trial_err) - { - trial_err = trial_err2; - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - memcpy(trial_sels, trial_sels2, sizeof(trial_sels)); - } - - } // s - } - - if (trial_err < cur_err) - { - results.m_3color = true; - results.lr = lr; - results.lg = lg; - results.lb = lb; - results.hr = hr; - results.hg = hg; - results.hb = hb; - memcpy(results.sels, trial_sels, 16); - - cur_err = trial_err; - - return true; - } - - return false; - } - - void encode_bc1(uint32_t level, void* pDst, const uint8_t* pPixels, bool allow_3color, bool allow_transparent_texels_for_black) - { - uint32_t flags = 0, total_orderings4 = 1, total_orderings3 = 1; - - static_assert(MAX_TOTAL_ORDERINGS3 >= 32, "MAX_TOTAL_ORDERINGS3 >= 32"); - static_assert(MAX_TOTAL_ORDERINGS4 >= 32, "MAX_TOTAL_ORDERINGS4 >= 32"); - - switch (level) - { - case 0: - // Faster/higher quality than stb_dxt default. - flags = cEncodeBC1BoundingBoxInt; - break; - case 1: - // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. - flags = cEncodeBC1Use2DLS; - break; - case 2: - // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). - // Slightly stronger than stb_dxt. - flags = 0; - break; - case 3: - // Slightly stronger than stb_dxt HIGHQUAL. - flags = cEncodeBC1TwoLeastSquaresPasses; - break; - case 4: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1Use6PowerIters; - break; - default: - case 5: - // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 6: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - break; - case 7: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 4; - break; - case 8: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFasterMSEEval | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 8; - break; - case 9: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 11; - total_orderings3 = 3; - break; - case 10: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 20; - total_orderings3 = 8; - break; - case 11: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 28; - total_orderings3 = 16; - break; - case 12: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseLikelyTotalOrderings; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 13: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (20 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 14: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - case 15: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (32 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = ((((32 + MAX_TOTAL_ORDERINGS4) / 2) + 32) / 2); - total_orderings3 = 32; - break; - case 16: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = (32 + MAX_TOTAL_ORDERINGS4) / 2; - total_orderings3 = 32; - break; - case 17: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 18: - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = MAX_TOTAL_ORDERINGS4; - total_orderings3 = 32; - break; - case 19: - // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. - flags = cEncodeBC1TwoLeastSquaresPasses | cEncodeBC1UseFullMSEEval | cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use6PowerIters | cEncodeBC1Exhaustive | cEncodeBC1Iterative | (256 << cEncodeBC1EndpointSearchRoundsShift) | cEncodeBC1TryAllInitialEndponts; - flags |= (allow_3color ? cEncodeBC1Use3ColorBlocks : 0) | (allow_transparent_texels_for_black ? cEncodeBC1Use3ColorBlocksForBlackPixels : 0); - total_orderings4 = 32; - total_orderings3 = 32; - break; - } - - encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3); - } - - static inline void encode_bc1_pick_initial(const color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag, - int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, - int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, - int &lr, int &lg, int &lb, int &hr, int &hg, int &hb) - { - if (grayscale_flag) - { - const int fr = pSrc_pixels[0].r; - - // Grayscale blocks are a common enough case to specialize. - if ((max_r - min_r) < 2) - { - lr = lb = hr = hb = to_5(fr); - lg = hg = to_6(fr); - } - else - { - lr = lb = to_5(min_r); - lg = to_6(min_r); - - hr = hb = to_5(max_r); - hg = to_6(max_r); - } - } - else if (flags & cEncodeBC1Use2DLS) - { - // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. - int big_chan = 0, min_chan_val = min_r, max_chan_val = max_r; - if ((max_g - min_g) > (max_chan_val - min_chan_val)) - { - big_chan = 1; min_chan_val = min_g; max_chan_val = max_g; - } - if ((max_b - min_b) > (max_chan_val - min_chan_val)) - { - big_chan = 2; min_chan_val = min_b; max_chan_val = max_b; - } - int sum_xy_r = 0, sum_xy_g = 0, sum_xy_b = 0; - vec3F l, h; - if (big_chan == 0) - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += r * r; sum_xy_g += r * g; sum_xy_b += r * b; - } - - int sum_x = total_r; - int sum_x2 = sum_xy_r; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_y = 0.0f, b_z = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_y = (16 * sum_xy_g - sum_x * total_g) * div; - b_z = (16 * sum_xy_b - sum_x * total_b) * div; - } - - float a_y = (total_g - b_y * sum_x) / 16.0f; - float a_z = (total_b - b_z * sum_x) / 16.0f; - - l.c[1] = a_y + b_y * min_chan_val; - l.c[2] = a_z + b_z * min_chan_val; - - h.c[1] = a_y + b_y * max_chan_val; - h.c[2] = a_z + b_z * max_chan_val; - - float dg = (h.c[1] - l.c[1]); - float db = (h.c[2] - l.c[2]); - - h.c[1] = l.c[1] + dg * (15.0f/16.0f); - h.c[2] = l.c[2] + db * (15.0f/16.0f); - - l.c[1] = l.c[1] + dg * (1.0f/16.0f); - l.c[2] = l.c[2] + db * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[0] = fmin_chan_val; - h.c[0] = fmax_chan_val; - } - else if (big_chan == 1) - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += g * r; sum_xy_g += g * g; sum_xy_b += g * b; - } - - int sum_x = total_g; - int sum_x2 = sum_xy_g; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_x = 0.0f, b_z = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_x = (16 * sum_xy_r - sum_x * total_r) * div; - b_z = (16 * sum_xy_b - sum_x * total_b) * div; - } - - float a_x = (total_r - b_x * sum_x) / 16.0f; - float a_z = (total_b - b_z * sum_x) / 16.0f; - - l.c[0] = a_x + b_x * min_chan_val; - l.c[2] = a_z + b_z * min_chan_val; - - h.c[0] = a_x + b_x * max_chan_val; - h.c[2] = a_z + b_z * max_chan_val; - - float dr = (h.c[0] - l.c[0]); - float db = (h.c[2] - l.c[2]); - - h.c[0] = l.c[0] + dr * (15.0f/16.0f); - h.c[2] = l.c[2] + db * (15.0f/16.0f); - - l.c[0] = l.c[0] + dr * (1.0f/16.0f); - l.c[2] = l.c[2] + db * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[1] = fmin_chan_val; - h.c[1] = fmax_chan_val; - } - else - { - for (uint32_t i = 0; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - sum_xy_r += b * r; sum_xy_g += b * g; sum_xy_b += b * b; - } - - int sum_x = total_b; - int sum_x2 = sum_xy_b; - - float div = (float)(16 * sum_x2 - sum_x * sum_x); - float b_x = 0.0f, b_y = 0.0f; - if (fabs(div) > 1e-8f) - { - div = 1.0f / div; - b_x = (16 * sum_xy_r - sum_x * total_r) * div; - b_y = (16 * sum_xy_g - sum_x * total_g) * div; - } - - float a_x = (total_r - b_x * sum_x) / 16.0f; - float a_y = (total_g - b_y * sum_x) / 16.0f; - - l.c[0] = a_x + b_x * min_chan_val; - l.c[1] = a_y + b_y * min_chan_val; - - h.c[0] = a_x + b_x * max_chan_val; - h.c[1] = a_y + b_y * max_chan_val; - - float dr = (h.c[0] - l.c[0]); - float dg = (h.c[1] - l.c[1]); - - h.c[0] = l.c[0] + dr * (15.0f/16.0f); - h.c[1] = l.c[1] + dg * (15.0f/16.0f); - - l.c[0] = l.c[0] + dr * (1.0f/16.0f); - l.c[1] = l.c[1] + dg * (1.0f/16.0f); - - float d = (float)(max_chan_val - min_chan_val); - float fmin_chan_val = min_chan_val + d * (1.0f/16.0f); - float fmax_chan_val = min_chan_val + d * (15.0f/16.0f); - - l.c[2] = fmin_chan_val; - h.c[2] = fmax_chan_val; - } - - precise_round_565_noscale(l, h, lr, lg, lb, hr, hg, hb); - } - else if (flags & cEncodeBC1BoundingBox) - { - // Algorithm from icbc.h compress_dxt1_fast() - vec3F l, h; - l.c[0] = min_r * (1.0f/255.0f); - l.c[1] = min_g * (1.0f/255.0f); - l.c[2] = min_b * (1.0f/255.0f); - - h.c[0] = max_r * (1.0f/255.0f); - h.c[1] = max_g * (1.0f/255.0f); - h.c[2] = max_b * (1.0f/255.0f); - - const float bias = 8.0f / 255.0f; - float inset_r = (h.c[0] - l.c[0] - bias) * (1.0f/16.0f); - float inset_g = (h.c[1] - l.c[1] - bias) * (1.0f/16.0f); - float inset_b = (h.c[2] - l.c[2] - bias) * (1.0f/16.0f); - - l.c[0] = clampf(l.c[0] + inset_r, 0.0f, 1.0f); - l.c[1] = clampf(l.c[1] + inset_g, 0.0f, 1.0f); - l.c[2] = clampf(l.c[2] + inset_b, 0.0f, 1.0f); - - h.c[0] = clampf(h.c[0] - inset_r, 0.0f, 1.0f); - h.c[1] = clampf(h.c[1] - inset_g, 0.0f, 1.0f); - h.c[2] = clampf(h.c[2] - inset_b, 0.0f, 1.0f); - - int icov_xz = 0, icov_yz = 0; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov_xz += r * b; - icov_yz += g * b; - } - - if (icov_xz < 0) - std::swap(l.c[0], h.c[0]); - - if (icov_yz < 0) - std::swap(l.c[1], h.c[1]); - - precise_round_565(l, h, lr, lg, lb, hr, hg, hb); - } - else if (flags & cEncodeBC1BoundingBoxInt) - { - // Algorithm from icbc.h compress_dxt1_fast(), but converted to integer. - int inset_r = (max_r - min_r - 8) >> 4; - int inset_g = (max_g - min_g - 8) >> 4; - int inset_b = (max_b - min_b - 8) >> 4; - - min_r += inset_r; - min_g += inset_g; - min_b += inset_b; - if ((uint32_t)(min_r | min_g | min_b) > 255U) - { - min_r = clampi(min_r, 0, 255); - min_g = clampi(min_g, 0, 255); - min_b = clampi(min_b, 0, 255); - } - - max_r -= inset_r; - max_g -= inset_g; - max_b -= inset_b; - if ((uint32_t)(max_r | max_g | max_b) > 255U) - { - max_r = clampi(max_r, 0, 255); - max_g = clampi(max_g, 0, 255); - max_b = clampi(max_b, 0, 255); - } - - int icov_xz = 0, icov_yz = 0; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov_xz += r * b; - icov_yz += g * b; - } - - int x0 = min_r; - int y0 = min_g; - int x1 = max_r; - int y1 = max_g; - - if (icov_xz < 0) - std::swap(x0, x1); - - if (icov_yz < 0) - std::swap(y0, y1); - - lr = to_5(x0); - lg = to_6(y0); - lb = to_5(min_b); - - hr = to_5(x1); - hg = to_6(y1); - hb = to_5(max_b); - } - else - { - // Select 2 colors along the principle axis. (There must be a faster/simpler way.) - uint32_t low_c = 0, high_c = 0; - - int icov[6] = { 0, 0, 0, 0, 0, 0 }; - for (uint32_t i = 0; i < 16; i++) - { - int r = (int)pSrc_pixels[i].r - avg_r; - int g = (int)pSrc_pixels[i].g - avg_g; - int b = (int)pSrc_pixels[i].b - avg_b; - icov[0] += r * r; - icov[1] += r * g; - icov[2] += r * b; - icov[3] += g * g; - icov[4] += g * b; - icov[5] += b * b; - } - - int saxis_r = 306, saxis_g = 601, saxis_b = 117; - - float xr = (float)(max_r - min_r); - float xg = (float)(max_g - min_g); - float xb = (float)(max_b - min_b); - - if (icov[2] < 0) - xr = -xr; - - if (icov[4] < 0) - xg = -xg; - - float cov[6]; - for (uint32_t i = 0; i < 6; i++) - cov[i] = (float)(icov[i]) * (1.0f / 255.0f); - - const uint32_t total_power_iters = (flags & cEncodeBC1Use6PowerIters) ? 6 : 4; - for (uint32_t power_iter = 0; power_iter < total_power_iters; power_iter++) - { - float r = xr * cov[0] + xg * cov[1] + xb * cov[2]; - float g = xr * cov[1] + xg * cov[3] + xb * cov[4]; - float b = xr * cov[2] + xg * cov[4] + xb * cov[5]; - xr = r; xg = g; xb = b; - } - - float k = maximum(fabsf(xr), fabsf(xg), fabsf(xb)); - if (k >= 2) - { - float m = 2048.0f / k; - saxis_r = (int)(xr * m); - saxis_g = (int)(xg * m); - saxis_b = (int)(xb * m); - } - - int low_dot = INT_MAX, high_dot = INT_MIN; - - saxis_r = (int)((uint32_t)saxis_r << 4U); - saxis_g = (int)((uint32_t)saxis_g << 4U); - saxis_b = (int)((uint32_t)saxis_b << 4U); - - for (uint32_t i = 0; i < 16; i += 4) - { - int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i; - int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1; - int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2; - int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3; - - int min_d01 = std::min(dot0, dot1); - int max_d01 = std::max(dot0, dot1); - - int min_d23 = std::min(dot2, dot3); - int max_d23 = std::max(dot2, dot3); - - int min_d = std::min(min_d01, min_d23); - int max_d = std::max(max_d01, max_d23); - - low_dot = std::min(low_dot, min_d); - high_dot = std::max(high_dot, max_d); - } - low_c = low_dot & 15; - high_c = high_dot & 15; - - lr = to_5(pSrc_pixels[low_c].r); - lg = to_6(pSrc_pixels[low_c].g); - lb = to_5(pSrc_pixels[low_c].b); - - hr = to_5(pSrc_pixels[high_c].r); - hg = to_6(pSrc_pixels[high_c].g); - hb = to_5(pSrc_pixels[high_c].b); - } - } - - static const int8_t s_adjacent_voxels[16][4] = - { - { 1,0,0, 3 }, // 0 - { 0,1,0, 4 }, // 1 - { 0,0,1, 5 }, // 2 - { -1,0,0, 0 }, // 3 - { 0,-1,0, 1 }, // 4 - { 0,0,-1, 2 }, // 5 - { 1,1,0, 9 }, // 6 - { 1,0,1, 10 }, // 7 - { 0,1,1, 11 }, // 8 - { -1,-1,0, 6 }, // 9 - { -1,0,-1, 7 }, // 10 - { 0,-1,-1, 8 }, // 11 - { -1,1,0, 13 }, // 12 - { 1,-1,0, 12 }, // 13 - { 0,-1,1, 15 }, // 14 - { 0,1,-1, 14 }, // 15 - }; - - // From icbc's high quality mode. - static inline void encode_bc1_endpoint_search(const color32 *pSrc_pixels, bool any_black_pixels, - uint32_t flags, bc1_encode_results &results, uint32_t cur_err) - { - int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb; - uint8_t *sels = results.sels; - - int prev_improvement_index = 0, forbidden_direction = -1; - - const int endpoint_search_rounds = (flags & cEncodeBC1EndpointSearchRoundsMask) >> cEncodeBC1EndpointSearchRoundsShift; - for (int i = 0; i < endpoint_search_rounds; i++) - { - assert(s_adjacent_voxels[ s_adjacent_voxels[i & 15][3] ][3] == (i & 15)); - - if (forbidden_direction == (i & 31)) - continue; - - const int8_t delta[3] = { s_adjacent_voxels[i & 15][0], s_adjacent_voxels[i & 15][1], s_adjacent_voxels[i & 15][2] }; - - int trial_lr = lr, trial_lg = lg, trial_lb = lb, trial_hr = hr, trial_hg = hg, trial_hb = hb; - - if ((i >> 4) & 1) - { - trial_lr = clampi(trial_lr + delta[0], 0, 31); - trial_lg = clampi(trial_lg + delta[1], 0, 63); - trial_lb = clampi(trial_lb + delta[2], 0, 31); - } - else - { - trial_hr = clampi(trial_hr + delta[0], 0, 31); - trial_hg = clampi(trial_hg + delta[1], 0, 63); - trial_hb = clampi(trial_hb + delta[2], 0, 31); - } - - uint8_t trial_sels[16]; - - uint32_t trial_err; - if (results.m_3color) - { - trial_err = bc1_find_sels3_fullerr( - ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)), - pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - } - else - { - trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - } - - if (trial_err < cur_err) - { - cur_err = trial_err; - - forbidden_direction = s_adjacent_voxels[i & 15][3] | (i & 16); - - lr = trial_lr; lg = trial_lg; lb = trial_lb; hr = trial_hr; hg = trial_hg; hb = trial_hb; - - memcpy(sels, trial_sels, 16); - - prev_improvement_index = i; - } - - if (i - prev_improvement_index > 32) - break; - } - } - - void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try, uint32_t total_orderings_to_try3) - { - assert(g_initialized); - - const color32* pSrc_pixels = (const color32*)pPixels; - bc1_block* pDst_block = static_cast(pDst); - - int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b; - - const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; - - uint32_t j; - for (j = 15; j >= 1; --j) - if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) - break; - - if (j == 0) - { - encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); - return; - } - - int total_r = fr, total_g = fg, total_b = fb; - - max_r = fr; max_g = fg; max_b = fb; - min_r = fr; min_g = fg; min_b = fb; - - uint32_t grayscale_flag = (fr == fg) && (fr == fb); - uint32_t any_black_pixels = (fr | fg | fb) < 4; - - for (uint32_t i = 1; i < 16; i++) - { - const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; - - grayscale_flag &= ((r == g) && (r == b)); - any_black_pixels |= ((r | g | b) < 4); - - max_r = std::max(max_r, r); max_g = std::max(max_g, g); max_b = std::max(max_b, b); - min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); - total_r += r; total_g += g; total_b += b; - } - - avg_r = (total_r + 8) >> 4; avg_g = (total_g + 8) >> 4; avg_b = (total_b + 8) >> 4; - - bc1_encode_results results; - results.m_3color = false; - - uint8_t *sels = results.sels; - int &lr = results.lr, &lg = results.lg, &lb = results.lb, &hr = results.hr, &hg = results.hg, &hb = results.hb; - int orig_lr = 0, orig_lg = 0, orig_lb = 0, orig_hr = 0, orig_hg = 0, orig_hb = 0; - - lr = 0; lg = 0; lb = 0; hr = 0; hg = 0; hb = 0; - - const bool needs_block_error = ((flags & (cEncodeBC1UseLikelyTotalOrderings | cEncodeBC1Use3ColorBlocks | cEncodeBC1UseFullMSEEval | cEncodeBC1EndpointSearchRoundsMask)) != 0) || - (any_black_pixels && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)); - - uint32_t cur_err = UINT32_MAX; - - if (!needs_block_error) - { - assert((flags & cEncodeBC1TryAllInitialEndponts) == 0); - - encode_bc1_pick_initial(pSrc_pixels, flags, grayscale_flag != 0, - min_r, min_g, min_b, max_r, max_g, max_b, - avg_r, avg_g, avg_b, total_r, total_g, total_b, - lr, lg, lb, hr, hg, hb); - - orig_lr = lr; orig_lg = lg; orig_lb = lb; orig_hr = hr; orig_hg = hg; orig_hb = hb; - - bc1_find_sels4_noerr(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); - - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) - { - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, sels, &xl, &xh, total_r, total_g, total_b)) - { - // All selectors equal - treat it as a solid block which should always be equal or better. - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - - // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. - } - else - { - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - if ((lr == trial_lr) && (lg == trial_lg) && (lb == trial_lb) && (hr == trial_hr) && (hg == trial_hg) && (hb == trial_hb)) - break; - - bc1_find_sels4_noerr(pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, sels); - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - } // ls_pass - } - else - { - const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; - for (uint32_t round = 0; round < total_rounds; round++) - { - uint32_t modified_flags = flags; - if (round == 1) - { - modified_flags &= ~(cEncodeBC1Use2DLS | cEncodeBC1BoundingBox); - modified_flags |= cEncodeBC1BoundingBox; - } - - int round_lr, round_lg, round_lb, round_hr, round_hg, round_hb; - uint8_t round_sels[16]; - - encode_bc1_pick_initial(pSrc_pixels, modified_flags, grayscale_flag != 0, - min_r, min_g, min_b, max_r, max_g, max_b, - avg_r, avg_g, avg_b, total_r, total_g, total_b, - round_lr, round_lg, round_lb, round_hr, round_hg, round_hb); - - int orig_round_lr = round_lr, orig_round_lg = round_lg, orig_round_lb = round_lb, orig_round_hr = round_hr, orig_round_hg = round_hg, orig_round_hb = round_hb; - - uint32_t round_err = bc1_find_sels4(flags, pSrc_pixels, round_lr, round_lg, round_lb, round_hr, round_hg, round_hb, round_sels, UINT32_MAX); - - const uint32_t total_ls_passes = flags & cEncodeBC1TwoLeastSquaresPasses ? 2 : 1; - for (uint32_t ls_pass = 0; ls_pass < total_ls_passes; ls_pass++) - { - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - if (!compute_least_squares_endpoints4_rgb(pSrc_pixels, round_sels, &xl, &xh, total_r, total_g, total_b)) - { - // All selectors equal - treat it as a solid block which should always be equal or better. - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - - // In high/higher quality mode, let it try again in case the optimal tables have caused the sels to diverge. - } - else - { - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - if ((round_lr == trial_lr) && (round_lg == trial_lg) && (round_lb == trial_lb) && (round_hr == trial_hr) && (round_hg == trial_hg) && (round_hb == trial_hb)) - break; - - uint8_t trial_sels[16]; - uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, round_err); - - if (trial_err < round_err) - { - round_lr = trial_lr; - round_lg = trial_lg; - round_lb = trial_lb; - - round_hr = trial_hr; - round_hg = trial_hg; - round_hb = trial_hb; - - round_err = trial_err; - memcpy(round_sels, trial_sels, 16); - } - else - break; - - } // ls_pass - - if (round_err <= cur_err) - { - cur_err = round_err; - - lr = round_lr; - lg = round_lg; - lb = round_lb; - hr = round_hr; - hg = round_hg; - hb = round_hb; - - orig_lr = orig_round_lr; - orig_lg = orig_round_lg; - orig_lb = orig_round_lb; - orig_hr = orig_round_hr; - orig_hg = orig_round_hg; - orig_hb = orig_round_hb; - - memcpy(sels, round_sels, 16); - } - - } // round - } - - if ((cur_err) && (flags & cEncodeBC1UseLikelyTotalOrderings)) - { - assert(needs_block_error); - - const uint32_t total_iters = (flags & cEncodeBC1Iterative) ? 2 : 1; - for (uint32_t iter_index = 0; iter_index < total_iters; iter_index++) - { - const uint32_t orig_err = cur_err; - - hist4 h; - for (uint32_t i = 0; i < 16; i++) - { - assert(sels[i] < 4); - h.m_hist[sels[i]]++; - } - - const uint32_t orig_total_order_index = h.lookup_total_ordering_index(); - - int r0, g0, b0, r3, g3, b3; - r0 = (lr << 3) | (lr >> 2); g0 = (lg << 2) | (lg >> 4); b0 = (lb << 3) | (lb >> 2); - r3 = (hr << 3) | (hr >> 2); g3 = (hg << 2) | (hg >> 4); b3 = (hb << 3) | (hb >> 2); - - int ar = r3 - r0, ag = g3 - g0, ab = b3 - b0; - - int dots[16]; - for (uint32_t i = 0; i < 16; i++) - { - int r = pSrc_pixels[i].r; - int g = pSrc_pixels[i].g; - int b = pSrc_pixels[i].b; - int d = 0x1000000 + (r * ar + g * ag + b * ab); - assert(d >= 0); - dots[i] = (d << 4) + i; - } - - std::sort(dots, dots + 16); - - uint32_t r_sum[17], g_sum[17], b_sum[17]; - uint32_t r = 0, g = 0, b = 0; - for (uint32_t i = 0; i < 16; i++) - { - const uint32_t p = dots[i] & 15; - - r_sum[i] = r; - g_sum[i] = g; - b_sum[i] = b; - - r += pSrc_pixels[p].r; - g += pSrc_pixels[p].g; - b += pSrc_pixels[p].b; - } - - r_sum[16] = total_r; - g_sum[16] = total_g; - b_sum[16] = total_b; - - const uint32_t q_total = (flags & cEncodeBC1Exhaustive) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : clampi(total_orderings_to_try, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4); - for (uint32_t q = 0; q < q_total; q++) - { - const uint32_t s = (flags & cEncodeBC1Exhaustive) ? q : g_best_total_orderings4[orig_total_order_index][q]; - - int trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb; - - vec3F xl, xh; - - if ((s == TOTAL_ORDER_4_0_16) || (s == TOTAL_ORDER_4_1_16) || (s == TOTAL_ORDER_4_2_16) || (s == TOTAL_ORDER_4_3_16)) - { - trial_lr = g_bc1_match5_equals_1[avg_r].m_hi; - trial_lg = g_bc1_match6_equals_1[avg_g].m_hi; - trial_lb = g_bc1_match5_equals_1[avg_b].m_hi; - - trial_hr = g_bc1_match5_equals_1[avg_r].m_lo; - trial_hg = g_bc1_match6_equals_1[avg_g].m_lo; - trial_hb = g_bc1_match5_equals_1[avg_b].m_lo; - } - else - { - compute_least_squares_endpoints4_rgb(&xl, &xh, total_r, total_g, total_b, - g_selector_factors4[s][0], g_selector_factors4[s][1], g_selector_factors4[s][2], s, r_sum, g_sum, b_sum); - - precise_round_565(xl, xh, trial_hr, trial_hg, trial_hb, trial_lr, trial_lg, trial_lb); - } - - uint8_t trial_sels[16]; - - uint32_t trial_err = bc1_find_sels4(flags, pSrc_pixels, trial_lr, trial_lg, trial_lb, trial_hr, trial_hg, trial_hb, trial_sels, cur_err); - - if (trial_err < cur_err) - { - cur_err = trial_err; - - lr = trial_lr; - lg = trial_lg; - lb = trial_lb; - - hr = trial_hr; - hg = trial_hg; - hb = trial_hb; - - memcpy(sels, trial_sels, 16); - } - - } // s - - if ((!cur_err) || (cur_err == orig_err)) - break; - - } // iter_index - } - - if ( ((flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0) && (cur_err) ) - { - if (flags & cEncodeBC1Use3ColorBlocks) - { - assert(needs_block_error); - try_3color_block(pSrc_pixels, flags, cur_err, avg_r, avg_g, avg_b, orig_lr, orig_lg, orig_lb, orig_hr, orig_hg, orig_hb, total_r, total_g, total_b, total_orderings_to_try3, results); - } - - if ((any_black_pixels) && ((flags & cEncodeBC1Use3ColorBlocksForBlackPixels) != 0)) - { - assert(needs_block_error); - try_3color_block_useblack(pSrc_pixels, flags, cur_err, results); - } - } - - if ( (flags & cEncodeBC1EndpointSearchRoundsMask) && (cur_err) ) - { - assert(needs_block_error); - - encode_bc1_endpoint_search(pSrc_pixels, any_black_pixels != 0, flags, results, cur_err); - } - - if (results.m_3color) - bc1_encode3(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); - else - bc1_encode4(pDst_block, results.lr, results.lg, results.lb, results.hr, results.hg, results.hb, results.sels); - } - - // BC3-5 - - struct bc4_block - { - enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; - uint8_t m_endpoints[2]; - - uint8_t m_selectors[cTotalSelectorBytes]; - - inline uint32_t get_low_alpha() const { return m_endpoints[0]; } - inline uint32_t get_high_alpha() const { return m_endpoints[1]; } - inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - - inline uint64_t get_selector_bits() const - { - return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | - (((uint64_t)m_selectors[4]) << 32U) | - (((uint64_t)m_selectors[5]) << 40U); - } - - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const - { - assert((x < 4U) && (y < 4U)); - return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits))& (cMaxSelectorValues - 1); - } - - static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h) - { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 4 + h) / 5); - pDst[3] = static_cast((l * 3 + h * 2) / 5); - pDst[4] = static_cast((l * 2 + h * 3) / 5); - pDst[5] = static_cast((l + h * 4) / 5); - pDst[6] = 0; - pDst[7] = 255; - return 6; - } - - static inline uint32_t get_block_values8(uint8_t* pDst, uint32_t l, uint32_t h) - { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 6 + h) / 7); - pDst[3] = static_cast((l * 5 + h * 2) / 7); - pDst[4] = static_cast((l * 4 + h * 3) / 7); - pDst[5] = static_cast((l * 3 + h * 4) / 7); - pDst[6] = static_cast((l * 2 + h * 5) / 7); - pDst[7] = static_cast((l + h * 6) / 7); - return 8; - } - - static inline uint32_t get_block_values(uint8_t* pDst, uint32_t l, uint32_t h) - { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); - } - }; - - void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) - { - assert(g_initialized); - - uint32_t min0_v, max0_v, min1_v, max1_v, min2_v, max2_v, min3_v, max3_v; - - { - min0_v = max0_v = pPixels[0 * stride]; - min1_v = max1_v = pPixels[1 * stride]; - min2_v = max2_v = pPixels[2 * stride]; - min3_v = max3_v = pPixels[3 * stride]; - } - - { - uint32_t v0 = pPixels[4 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[5 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[6 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[7 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - { - uint32_t v0 = pPixels[8 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[9 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[10 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[11 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - { - uint32_t v0 = pPixels[12 * stride]; min0_v = std::min(min0_v, v0); max0_v = std::max(max0_v, v0); - uint32_t v1 = pPixels[13 * stride]; min1_v = std::min(min1_v, v1); max1_v = std::max(max1_v, v1); - uint32_t v2 = pPixels[14 * stride]; min2_v = std::min(min2_v, v2); max2_v = std::max(max2_v, v2); - uint32_t v3 = pPixels[15 * stride]; min3_v = std::min(min3_v, v3); max3_v = std::max(max3_v, v3); - } - - const uint32_t min_v = minimum(min0_v, min1_v, min2_v, min3_v); - const uint32_t max_v = maximum(max0_v, max1_v, max2_v, max3_v); - - uint8_t* pDst_bytes = static_cast(pDst); - pDst_bytes[0] = (uint8_t)max_v; - pDst_bytes[1] = (uint8_t)min_v; - - if (max_v == min_v) - { - memset(pDst_bytes + 2, 0, 6); - return; - } - - const uint32_t delta = max_v - min_v; - - // min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors. - const int t0 = delta * 13; - const int t1 = delta * 11; - const int t2 = delta * 9; - const int t3 = delta * 7; - const int t4 = delta * 5; - const int t5 = delta * 3; - const int t6 = delta * 1; - - // BC4 floors in its divisions, which we compensate for with the 4 bias. - // This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one). - const int bias = 4 - min_v * 14; - - static const uint32_t s_tran0[8] = { 1U , 7U , 6U , 5U , 4U , 3U , 2U , 0U }; - static const uint32_t s_tran1[8] = { 1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U }; - static const uint32_t s_tran2[8] = { 1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U }; - static const uint32_t s_tran3[8] = { 1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U }; - - uint64_t a0, a1, a2, a3; - { - const int v0 = pPixels[0 * stride] * 14 + bias; - const int v1 = pPixels[1 * stride] * 14 + bias; - const int v2 = pPixels[2 * stride] * 14 + bias; - const int v3 = pPixels[3 * stride] * 14 + bias; - a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]; - a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]; - a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]; - a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]; - } - - { - const int v0 = pPixels[4 * stride] * 14 + bias; - const int v1 = pPixels[5 * stride] * 14 + bias; - const int v2 = pPixels[6 * stride] * 14 + bias; - const int v3 = pPixels[7 * stride] * 14 + bias; - a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U); - a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U); - a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); - a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); - } - - { - const int v0 = pPixels[8 * stride] * 14 + bias; - const int v1 = pPixels[9 * stride] * 14 + bias; - const int v2 = pPixels[10 * stride] * 14 + bias; - const int v3 = pPixels[11 * stride] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U); - } - - { - const int v0 = pPixels[12 * stride] * 14 + bias; - const int v1 = pPixels[13 * stride] * 14 + bias; - const int v2 = pPixels[14 * stride] * 14 + bias; - const int v3 = pPixels[15 * stride] * 14 + bias; - a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U); - a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U); - a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U); - a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U); - } - - const uint64_t f = a0 | a1 | a2 | a3; - - pDst_bytes[2] = (uint8_t)f; - pDst_bytes[3] = (uint8_t)(f >> 8U); - pDst_bytes[4] = (uint8_t)(f >> 16U); - pDst_bytes[5] = (uint8_t)(f >> 24U); - pDst_bytes[6] = (uint8_t)(f >> 32U); - pDst_bytes[7] = (uint8_t)(f >> 40U); - } - - void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try) - { - assert(g_initialized); - - // 3-color blocks are not allowed with BC3 (on most GPU's). - flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks); - - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(static_cast(pDst) + 8, pPixels, flags, total_orderings_to_try); - } - - void encode_bc3(uint32_t level, void* pDst, const uint8_t* pPixels) - { - assert(g_initialized); - - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); - } - - void encode_bc5(void* pDst, const uint8_t* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) - { - assert(g_initialized); - - encode_bc4(pDst, pPixels + chan0, stride); - encode_bc4(static_cast(pDst) + 8, pPixels + chan1, stride); - } - - // Returns true if the block uses 3 color punchthrough alpha mode. - bool unpack_bc1(const void* pBlock_bits, void* pPixels, bool set_alpha, bc1_approx_mode mode) - { - color32* pDst_pixels = static_cast(pPixels); - - static_assert(sizeof(bc1_block) == 8, "sizeof(bc1_block) == 8"); - static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); - - const bc1_block* pBlock = static_cast(pBlock_bits); - - const uint32_t l = pBlock->get_low_color(); - const uint32_t h = pBlock->get_high_color(); - - color32 c[4]; - - const int cr0 = (l >> 11) & 31; - const int cg0 = (l >> 5) & 63; - const int cb0 = l & 31; - const int r0 = (cr0 << 3) | (cr0 >> 2); - const int g0 = (cg0 << 2) | (cg0 >> 4); - const int b0 = (cb0 << 3) | (cb0 >> 2); - - const int cr1 = (h >> 11) & 31; - const int cg1 = (h >> 5) & 63; - const int cb1 = h & 31; - const int r1 = (cr1 << 3) | (cr1 >> 2); - const int g1 = (cg1 << 2) | (cg1 >> 4); - const int b1 = (cb1 << 3) | (cb1 >> 2); - - bool used_punchthrough = false; - - if (l > h) - { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); - switch (mode) - { - case bc1_approx_mode::cBC1Ideal: - c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); - break; - case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); - c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); - c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); - break; - } - } - else - { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); - switch (mode) - { - case bc1_approx_mode::cBC1Ideal: - case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); - break; - case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); - break; - case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); - break; - } - - c[3].set_noclamp_rgba(0, 0, 0, 0); - used_punchthrough = true; - } - - if (set_alpha) - { - for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) - { - pDst_pixels[0] = c[pBlock->get_selector(0, y)]; - pDst_pixels[1] = c[pBlock->get_selector(1, y)]; - pDst_pixels[2] = c[pBlock->get_selector(2, y)]; - pDst_pixels[3] = c[pBlock->get_selector(3, y)]; - } - } - else - { - for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) - { - pDst_pixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pDst_pixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pDst_pixels[2].set_rgb(c[pBlock->get_selector(2, y)]); - pDst_pixels[3].set_rgb(c[pBlock->get_selector(3, y)]); - } - } - - return used_punchthrough; - } - - void unpack_bc4(const void* pBlock_bits, uint8_t* pPixels, uint32_t stride) - { - static_assert(sizeof(bc4_block) == 8, "sizeof(bc4_block) == 8"); - - const bc4_block* pBlock = static_cast(pBlock_bits); - - uint8_t sel_values[8]; - bc4_block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); - - const uint64_t selector_bits = pBlock->get_selector_bits(); - - for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) - { - pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; - pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; - pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; - pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; - } - } - - // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. - bool unpack_bc3(const void* pBlock_bits, void* pPixels, bc1_approx_mode mode) - { - color32* pDst_pixels = static_cast(pPixels); - - bool success = true; - - if (unpack_bc1((const uint8_t*)pBlock_bits + sizeof(bc4_block), pDst_pixels, true, mode)) - success = false; - - unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(color32)); - - return success; - } - - // writes RG - void unpack_bc5(const void* pBlock_bits, void* pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) - { - unpack_bc4(pBlock_bits, (uint8_t *)pPixels + chan0, stride); - unpack_bc4((const uint8_t*)pBlock_bits + sizeof(bc4_block), (uint8_t *)pPixels + chan1, stride); - } - -} // namespace rgbcx +} +#endif // #ifndef RGBCX_INCLUDE_H +#ifdef RGBCX_IMPLEMENTATION #endif //#ifdef RGBCX_IMPLEMENTATION /* diff --git a/libkram/bc7enc/rgbcx_table4_small.h b/libkram/bc7enc/rgbcx_table4_small.h new file mode 100644 index 00000000..30ccc709 --- /dev/null +++ b/libkram/bc7enc/rgbcx_table4_small.h @@ -0,0 +1,969 @@ +{ 202, 120, 13, 318, 15, 23, 403, 450, 5, 51, 260, 128, 77, 21, 33, 494, 515, 523, 4, 141, 269, 1, 2, 700, 137, 49, 48, 102, 7, 64, 753, 82 }, +{ 13,141,23,217,115,51,77,2,64,21,0,4,5,317,137,269,202,33,318,7,291,352,9,10,3,180,32,6,365,102,341,349 }, +{ 29,58,262,1,52,74,6,171,5,287,151,334,27,500,75,26,331,223,53,635,220,19,50,45,46,17,14,396,163,409,324,70 }, +{ 40,51,33,453,14,23,62,56,12,196,730,475,153,99,403,775,117,130,585,34,4,17,162,11,139,57,102,38,108,47,123,440 }, +{ 33,23,51,13,102,64,202,128,12,40,15,196,153,10,1,2,77,99,141,0,515,5,117,3,120,403,700,165,22,14,269,453 }, +{ 13,23,51,4,77,141,202,33,115,64,32,128,0,11,177,40,15,102,2,217,7,137,269,21,90,59,515,1,180,403,22,6 }, +{ 26,235,19,47,648,624,78,145,27,112,122,64,444,6,630,453,25,42,65,130,711,85,390,113,416,108,665,29,730,138,644,95 }, +{ 64,141,352,751,217,247,237,437,177,269,86,954,947,875,32,318,95,77,304,92,597,180,232,291,128,864,349,588,372,202,312,1 }, +{ 642,898,180,638,901,341,82,197,10,951,15,515,165,762,700,253,811,753,752,365,143,479,244,569,8,110,351,873,55,31,499,116 }, +{ 221,23,51,125,438,254,13,21,39,49,308,656,0,115,530,159,158,401,30,166,912,386,165,688,518,9,105,627,424,22,421,33 }, +{ 143,31,1,44,197,8,180,125,116,55,13,498,23,341,638,242,93,15,2,141,0,901,752,115,36,206,165,479,338,365,515,762 }, +{ 12,23,51,13,14,15,37,99,515,38,700,117,2,196,134,153,753,64,54,33,128,120,21,0,328,5,139,82,453,719,457,1 }, +{ 13,15,23,515,961,700,457,753,51,115,4,165,197,2,38,569,1,474,0,37,99,719,5,12,629,14,11,3,33,77,64,10 }, +{ 15,515,700,753,1,0,2,4,3,23,134,12,961,5,10,197,11,33,82,120,457,51,165,7,6,341,217,21,77,9,40,180 }, +{ 13,51,23,457,719,961,730,401,165,453,0,117,386,15,134,1,758,153,12,54,515,99,11,2,700,5,753,4,308,33,6,899 }, +{ 134,898,82,117,13,33,77,102,23,260,341,351,120,901,197,153,961,111,196,110,180,457,854,10,450,8,165,40,4,115,0,365 }, +{ 60,18,126,167,35,16,191,71,24,92,121,271,68,107,212,146,118,150,199,7,21,1,9,575,727,5,566,48,0,132,108,273 }, +{ 62,136,129,123,128,41,162,17,249,211,214,789,618,710,38,678,248,507,57,64,152,269,119,3,177,183,597,106,4,179,216,90 }, +{ 403,523,51,475,494,453,817,899,202,23,450,13,421,120,102,730,33,128,4,1,805,5,7,153,757,260,318,196,77,457,326,65 }, +{ 4,59,3,62,12,33,56,193,27,21,102,17,40,77,76,84,32,0,6,123,119,177,128,11,18,611,605,25,13,51,73,210 }, +{ 43,20,319,422,414,945,0,7,819,61,5,376,325,173,804,904,470,693,97,707,14,49,22,104,147,107,95,32,426,1,330,577 }, +{ 13,23,51,2,0,115,4,141,217,33,10,77,1,15,64,180,3,515,7,6,22,102,11,5,40,9,165,700,202,197,317,341 }, +{ 28,49,0,105,1,24,65,159,35,55,95,239,16,2,109,7,9,14,170,320,347,168,424,158,10,301,124,5,67,21,64,36 }, +{ 15,515,700,753,0,1,13,2,117,4,12,10,5,165,457,3,9,134,11,7,6,51,77,64,961,82,33,197,14,341,120,141 }, +{ 7,71,14,149,97,18,60,16,150,92,398,189,140,124,24,273,35,2,69,302,154,68,0,336,517,43,66,28,118,251,230,1 }, +{ 4,102,33,77,40,59,11,624,210,12,128,342,5,503,91,139,64,32,25,494,202,678,416,0,403,275,21,450,196,318,523,177 }, +{ 25,19,42,6,122,813,256,235,85,26,436,53,297,573,680,390,445,63,27,416,80,233,65,73,389,283,45,605,194,17,250,343 }, +{ 402,102,202,128,33,300,403,23,12,77,40,21,342,117,483,99,25,494,6,4,63,32,84,569,139,757,475,318,19,26,196,134 }, +{ 158,9,0,109,39,49,65,22,35,168,55,24,68,124,159,16,185,344,333,154,254,272,175,289,1,577,95,28,105,810,30,169 }, +{ 197,180,115,237,498,165,2,5,287,546,400,3,61,34,509,13,297,80,341,52,45,186,58,881,23,873,468,176,64,17,311,250 }, +{ 120,968,373,260,704,110,450,202,137,318,77,95,269,326,217,717,661,652,851,349,93,1,518,98,827,291,21,177,82,33,848,719 }, +{ 44,116,144,268,434,489,367,384,98,127,918,93,948,31,206,940,855,0,203,137,9,22,617,141,332,105,393,492,959,282,299,131 }, +{ 13,77,23,33,51,0,64,141,102,4,2,115,1,6,202,15,10,128,269,7,177,180,3,40,22,11,515,217,117,318,700,137 }, +{ 15,515,700,753,4,11,141,40,165,23,64,180,13,202,32,3,51,125,5,197,21,128,0,93,77,1,120,82,269,117,110,59 }, +{ 176,231,585,62,34,14,412,161,56,236,527,57,17,3,51,202,4,23,369,283,128,13,472,440,84,361,136,457,381,130,719,53 }, +{ 9,0,180,217,237,101,141,352,88,100,230,64,175,317,115,498,68,39,30,1,702,83,213,36,365,208,752,13,252,321,952,546 }, +{ 28,9,22,1,49,0,109,39,83,95,86,30,13,105,128,55,141,168,158,67,31,159,208,12,96,5,185,2,160,64,137,23 }, +{ 72,4,38,12,51,89,477,11,57,76,401,308,23,474,99,148,413,179,59,13,431,152,54,569,17,3,205,629,197,421,405,15 }, +{ 457,13,23,961,15,51,515,700,165,12,753,629,11,1,719,117,0,3,2,37,569,197,40,328,33,5,153,134,99,64,38,196 }, +{ 254,100,310,9,30,1,39,625,166,265,190,0,272,557,131,731,31,98,578,688,404,93,101,88,49,21,127,264,44,36,252,478 }, +{ 51,23,12,13,15,128,99,120,10,202,515,153,64,82,700,33,165,2,5,117,403,1,141,0,3,196,37,453,753,197,260,93 }, +{ 38,99,542,139,453,117,196,23,457,13,328,111,37,134,961,11,12,51,40,775,587,401,474,54,153,477,41,629,33,475,14,277 }, +{ 6,85,25,233,343,91,26,63,138,29,19,65,283,4,81,235,42,122,605,64,648,256,174,370,74,389,718,59,45,194,445,416 }, +{ 49,5,97,20,197,21,18,193,0,64,408,729,173,350,43,422,165,7,14,104,61,32,509,713,523,102,120,95,125,397,35,232 }, +{ 144,116,268,434,384,489,367,206,93,855,940,44,98,332,617,127,959,911,137,282,203,31,22,219,141,9,131,276,417,0,1,120 }, +{ 17,106,64,62,32,255,136,292,476,162,129,241,123,141,41,237,720,214,209,352,519,211,186,148,752,247,507,90,21,77,197,119 }, +{ 2,29,52,50,5,58,14,6,27,1,366,357,45,53,17,19,171,151,26,181,133,38,218,764,287,583,61,113,3,487,600,281 }, +{ 130,59,196,412,381,730,711,236,77,210,202,402,453,99,401,108,361,803,291,283,153,4,57,51,128,183,14,719,503,117,23,11 }, +{ 13,23,51,141,77,4,33,64,115,0,217,10,180,202,2,102,11,9,15,165,40,21,128,352,22,7,197,3,317,515,269,1 }, +{ 23,13,202,51,120,15,21,5,141,1,128,269,137,515,64,102,125,48,98,33,260,523,318,93,700,165,450,77,2,12,403,82 }, +{ 1,2,14,46,29,67,38,52,5,171,58,24,103,69,96,70,83,181,54,75,163,223,16,45,112,309,155,0,186,35,18,108 }, +{ 15,515,700,753,13,0,1,2,153,5,23,10,117,3,9,7,134,165,12,6,341,33,4,14,77,457,115,21,719,180,217,82 }, +{ 197,165,509,13,391,180,308,115,23,546,5,498,2,29,3,401,901,61,34,80,14,457,250,569,237,873,38,297,45,15,468,386 }, +{ 19,73,27,250,200,714,444,472,26,53,34,17,813,322,283,390,128,297,78,123,432,14,436,136,106,690,57,122,389,80,503,3 }, +{ 3,17,21,45,62,32,38,12,155,14,2,328,5,99,401,536,828,13,227,488,106,51,719,119,540,76,165,221,115,629,209,41 }, +{ 115,341,873,197,365,13,901,180,569,752,317,1,10,498,143,634,261,0,509,15,943,237,44,31,116,601,165,127,282,23,141,64 }, +{ 453,51,23,403,33,421,475,102,15,153,196,515,13,700,117,523,12,40,753,21,4,134,0,494,670,899,22,801,730,10,11,401 }, +{ 23,13,51,33,12,117,153,134,453,196,15,99,515,40,14,700,128,102,11,753,77,64,403,202,0,401,475,37,65,2,3,38 }, +{ 2,7,5,14,70,1,29,61,52,45,6,112,66,16,21,32,592,46,38,135,87,58,186,315,290,128,113,0,64,48,227,23 }, +{ 33,23,102,51,128,13,64,202,141,1,77,10,153,40,196,117,2,3,0,5,15,269,403,12,137,134,318,165,120,6,453,99 }, +{ 16,92,7,20,43,35,126,71,60,14,107,18,68,97,0,121,279,149,24,246,191,48,118,575,55,140,362,783,230,150,375,566 }, +{ 13,23,4,33,77,64,51,102,141,128,32,10,0,202,40,115,59,22,90,11,177,21,291,6,7,318,180,117,137,2,95,165 }, +{ 507,162,129,41,4,211,62,38,123,59,57,248,183,130,99,11,3,361,202,17,402,556,266,305,803,210,128,184,152,136,313,117 }, +{ 643,123,193,650,802,18,25,389,718,256,65,289,84,91,619,511,415,90,235,63,57,510,324,216,862,102,6,183,108,397,217,736 }, +{ 13,23,15,1,515,51,0,2,700,5,753,165,141,115,12,3,4,180,21,197,457,7,6,10,120,9,33,202,77,32,8,11 }, +{ 23,51,13,453,64,403,12,21,5,202,128,475,165,141,523,95,125,115,3,1,4,730,120,32,2,494,180,719,457,197,450,401 }, +{ 204,74,135,66,6,174,192,7,138,172,85,353,348,580,280,97,95,500,29,64,426,32,87,889,65,81,25,2,52,43,568,673 }, +{ 35,0,68,69,24,9,1,16,65,103,149,133,18,114,28,50,83,2,189,7,46,14,101,336,175,124,251,55,71,218,38,238 }, +{ 16,101,0,118,9,18,24,68,35,154,71,124,60,212,191,520,55,806,694,167,28,39,364,375,1,346,252,65,604,302,22,21 }, +{ 0,9,16,35,1,24,68,18,65,21,103,67,13,149,28,189,71,23,101,238,114,7,335,133,486,141,22,212,48,50,30,118 }, +{ 13,202,23,77,33,51,128,5,21,141,115,32,102,64,4,0,318,269,10,15,291,2,494,177,11,217,3,515,22,137,6,700 }, +{ 16,92,60,35,7,18,24,68,150,149,14,71,0,375,97,126,118,107,230,191,246,273,140,55,175,653,9,575,2,28,566,517 }, +{ 76,90,21,179,316,148,205,32,464,288,184,257,245,1,89,2,460,57,152,45,38,358,645,5,12,449,350,48,37,17,4,14 }, +{ 19,27,26,813,80,297,17,495,436,53,73,200,4,378,250,59,106,25,45,128,361,42,113,469,122,390,77,40,736,6,11,136 }, +{ 6,26,235,138,19,145,112,70,331,262,25,42,52,624,27,453,122,47,500,78,648,85,29,2,630,632,409,113,50,226,108,75 }, +{ 7,16,14,24,92,35,18,2,46,9,60,140,0,87,50,5,54,13,12,38,171,23,126,21,58,64,1,70,128,71,220,163 }, +{ 90,205,257,184,32,179,460,5,245,45,2,288,769,524,57,21,152,229,17,1,497,4,292,59,619,452,432,76,476,11,266,14 }, +{ 15,515,700,753,4,5,11,141,13,1,33,3,0,128,202,23,180,21,2,64,269,32,117,134,120,40,102,318,153,17,137,352 }, +{ 47,130,711,108,453,412,730,196,390,283,78,27,51,183,381,236,128,200,719,14,153,472,503,34,59,250,3,4,57,803,123,432 }, +{ 12,277,51,474,111,153,23,99,13,37,961,94,629,542,569,431,79,139,38,134,117,453,33,188,196,40,115,15,11,157,401,515 }, +{ 17,495,469,106,26,378,80,27,161,483,19,742,527,436,383,862,73,136,53,814,297,6,119,84,62,56,25,3,209,611,4,128 }, +{ 81,681,636,91,0,750,370,104,718,138,18,693,173,784,29,397,348,74,192,673,174,65,6,207,64,280,306,52,671,32,355,319 }, +{ 15,515,700,753,33,77,4,102,115,117,40,13,1,153,134,11,5,217,23,196,2,21,3,317,32,365,0,341,291,59,12,51 }, +{ 0,9,28,35,68,1,65,67,101,39,69,175,16,238,13,22,96,124,18,24,251,30,55,12,23,2,50,141,114,5,154,103 }, +{ 23,33,77,13,117,40,11,102,64,4,51,403,153,453,10,0,196,134,128,65,12,291,86,99,95,59,15,141,202,180,137,719 }, +{ 214,90,289,6,874,64,25,65,235,42,751,249,256,312,194,85,746,875,174,32,525,288,519,835,247,348,233,544,217,524,437,352 }, +{ 1,22,2,0,36,67,28,5,49,95,12,50,168,83,105,55,7,9,14,194,103,23,114,21,584,46,10,13,38,69,208,159 }, +{ 269,141,13,202,33,180,318,77,291,137,102,352,128,23,349,51,31,217,372,317,125,197,44,21,11,5,901,1,18,0,4,494 }, +{ 435,144,274,88,203,418,30,1,190,410,96,778,100,530,521,326,466,795,686,166,960,321,382,264,367,822,131,31,692,9,213,93 }, +{ 76,72,90,21,37,179,12,205,32,428,148,38,308,405,4,413,57,184,749,245,316,221,54,645,288,1,152,155,464,257,2,14 }, +{ 77,33,64,102,13,141,23,2,40,1,51,10,0,115,6,180,202,128,4,3,177,269,15,7,22,165,291,14,217,318,137,11 }, +{ 397,81,4,32,65,788,693,804,681,11,249,21,91,64,690,494,3,0,422,56,348,725,194,123,23,59,523,319,61,510,95,90 }, +{ 60,126,16,7,92,121,314,246,35,107,150,132,14,146,24,18,199,298,232,71,359,140,672,97,392,649,5,423,95,21,22,388 }, +{ 15,515,141,217,115,700,13,23,120,317,753,180,33,260,110,137,341,51,1,365,4,77,64,202,0,40,36,352,197,269,10,21 }, +{ 111,134,117,474,23,13,961,12,569,431,37,15,51,115,515,700,277,99,753,38,197,405,457,4,72,94,629,45,11,89,54,148 }, +{ 23,13,51,5,1,15,2,21,12,202,141,0,515,165,120,32,4,64,700,3,115,197,269,125,753,7,9,128,6,180,453,403 }, +{ 13,141,4,23,5,2,115,217,202,51,180,137,269,352,77,1,317,3,21,318,0,15,9,64,10,197,11,341,33,515,752,7 }, +{ 165,125,197,13,391,21,23,558,48,380,97,120,298,33,14,426,66,115,32,386,900,180,6,98,357,237,326,509,51,278,221,457 }, +{ 120,82,15,260,515,1,351,77,450,700,13,21,141,23,753,202,217,93,110,33,51,854,5,128,326,102,137,180,817,48,269,352 }, +{ 23,13,15,51,515,700,961,753,0,457,1,2,4,115,10,453,569,5,33,165,11,719,14,40,64,197,3,21,474,629,38,401 }, +{ 264,166,39,30,9,100,435,254,93,921,190,363,1,625,411,382,897,656,203,478,404,812,438,110,473,88,18,691,156,141,274,272 }, +{ 9,0,252,100,166,39,101,265,364,68,88,329,520,18,419,676,118,167,404,604,16,1,21,30,212,158,553,49,382,274,48,13 }, +{ 15,515,700,753,4,11,141,5,3,13,202,1,180,21,2,165,269,23,40,64,0,318,12,32,128,51,77,117,523,197,120,457 }, +{ 24,1,2,69,35,16,67,18,14,50,0,46,68,9,38,7,133,71,83,149,28,108,189,218,65,114,238,29,75,54,5,96 }, +{ 90,289,214,64,874,13,77,712,66,751,4,23,51,192,32,0,202,194,312,177,33,65,234,104,875,288,59,5,835,416,102,95 }, +{ 0,9,49,127,98,31,301,28,371,159,1,395,512,737,158,761,916,623,16,44,242,39,170,18,293,105,24,272,101,22,23,385 }, +{ 17,62,136,214,123,129,32,292,119,209,710,106,141,162,128,64,45,4,77,249,11,618,211,3,207,130,519,183,38,177,21,269 }, +{ 5,107,581,356,279,32,441,362,493,660,13,298,0,534,49,147,21,22,132,121,97,423,7,590,259,683,14,786,126,508,60,246 }, +{ 51,13,15,730,453,23,515,719,386,457,12,700,403,475,899,1,6,523,753,421,99,401,165,33,2,19,361,5,0,670,120,27 }, +{ 49,28,9,159,272,22,254,131,158,327,95,105,0,39,35,168,347,286,374,55,65,627,424,912,68,578,1,24,239,175,688,169 }, +{ 15,515,700,33,753,4,77,141,341,317,1,10,13,180,102,22,40,117,115,365,5,901,23,197,134,11,217,351,64,82,21,137 }, +{ 134,15,13,515,23,700,12,753,51,474,37,961,197,10,457,569,4,0,99,2,115,38,165,153,94,3,139,11,1,82,33,5 }, +{ 7,2,20,58,5,14,128,66,6,29,32,43,21,52,16,38,631,61,74,97,46,135,113,25,202,192,13,0,884,45,112,87 }, +{ 77,13,33,202,23,128,102,4,141,342,117,0,269,318,134,22,11,21,32,153,403,291,49,64,137,51,40,15,494,5,196,98 }, +{ 2,1,14,6,46,38,29,65,5,36,67,0,103,7,22,86,133,50,108,208,52,83,24,323,283,69,28,18,10,25,23,75 }, +{ 15,515,700,753,1,5,4,2,3,13,0,11,180,341,12,33,10,197,134,365,77,23,21,901,6,117,165,7,37,32,17,102 }, +{ 203,268,206,93,417,940,31,8,120,137,44,499,959,473,202,692,728,559,0,260,10,326,141,564,817,127,341,1,450,22,110,23 }, +{ 15,82,515,120,700,0,10,753,33,8,64,165,110,31,260,93,13,197,23,22,40,4,351,44,77,9,11,153,102,51,1,196 }, +{ 60,0,16,7,14,43,20,71,28,10,2,22,154,18,13,24,92,1,51,576,35,615,805,925,68,126,124,149,97,64,23,55 }, +{ 19,6,26,80,5,84,27,17,25,2,504,129,45,240,56,123,4,119,618,1,76,106,64,51,14,3,128,65,32,710,0,42 }, +{ 15,515,700,753,13,4,77,23,33,51,0,5,8,10,11,31,44,1,82,22,202,64,110,102,93,21,291,40,141,180,9,49 }, +{ 195,98,271,223,132,167,146,407,1,360,121,834,393,591,212,199,293,259,522,107,354,147,156,191,807,590,48,18,125,16,765,541 }, +{ 128,202,77,210,402,318,33,102,6,40,403,29,342,269,196,757,99,139,2,111,42,4,494,117,275,300,13,12,678,0,177,122 }, +{ 13,33,23,40,51,102,4,117,77,64,134,0,128,153,202,196,453,11,15,12,1,22,403,141,59,14,10,475,515,65,700,95 }, +{ 7,16,14,24,18,2,28,0,92,71,1,22,6,35,60,20,168,10,154,118,5,302,124,69,97,109,703,158,420,12,149,66 }, +{ 15,1,515,23,0,13,700,2,51,753,180,5,120,165,197,21,115,4,33,9,141,7,12,6,3,457,386,202,260,523,8,31 }, +{ 60,107,121,132,146,126,199,279,150,92,16,649,441,35,955,7,21,0,423,5,18,195,598,298,493,356,32,653,22,362,953,10 }, +{ 31,44,98,276,284,299,116,935,9,201,0,131,39,127,144,662,1,137,371,492,567,489,93,254,49,268,22,28,30,293,434,737 }, +{ 13,15,23,515,700,0,1,51,753,4,2,10,77,202,5,115,3,165,197,457,9,12,11,961,33,120,22,141,180,7,6,40 }, +{ 123,162,184,257,17,183,229,130,129,3,84,136,99,152,556,383,57,497,12,205,4,62,56,452,80,266,128,14,40,119,27,106 }, +{ 196,33,117,40,153,23,134,13,51,102,453,0,15,475,12,14,515,2,22,700,4,21,753,64,401,670,730,1,9,11,10,99 }, +{ 224,219,187,131,258,385,442,871,836,31,98,908,44,574,127,944,137,839,116,36,613,1,254,39,926,160,829,96,93,371,860,827 }, +{ 121,195,156,132,146,360,590,407,786,522,883,591,259,929,626,941,150,687,5,55,296,379,467,178,586,465,279,21,1,13,60,354 }, +{ 2,1,14,29,6,5,46,52,38,19,114,75,26,65,108,96,25,50,36,70,103,309,17,236,218,74,12,86,0,3,10,112 }, +{ 15,515,82,700,120,753,10,0,8,197,260,165,351,64,13,110,117,93,31,1,9,33,22,23,457,44,450,77,102,898,40,49 }, +{ 7,66,97,2,172,74,226,52,29,135,192,232,43,324,92,5,38,20,222,14,6,568,87,107,353,620,580,16,138,174,448,32 }, +{ 62,129,123,162,136,249,618,183,507,57,4,152,17,59,11,184,117,77,3,128,211,41,130,205,12,40,33,106,64,229,38,313 }, +{ 1,13,15,2,4,515,23,0,3,115,700,5,51,77,341,141,753,180,33,217,197,202,901,6,21,165,11,365,318,317,10,102 }, +{ 6,26,235,19,145,47,112,78,64,27,453,95,29,444,25,624,85,108,648,70,32,130,74,42,711,630,632,138,65,122,113,730 }, +{ 23,51,12,15,13,99,515,153,117,10,700,37,120,82,165,2,753,64,128,0,403,3,5,1,134,197,453,31,202,457,110,21 }, +{ 16,24,18,71,64,35,92,7,246,146,9,108,60,118,199,5,140,2,267,0,230,830,32,133,1,68,50,330,247,563,36,12 }, +{ 15,515,700,753,0,1,13,2,23,3,4,217,51,5,115,8,9,180,341,10,7,6,317,77,33,372,901,197,365,11,120,165 }, +{ 234,639,178,202,77,142,5,455,450,49,416,0,147,427,198,21,315,329,13,318,325,557,120,344,113,259,22,128,61,105,23,494 }, +{ 1,31,36,44,141,180,55,2,64,22,98,116,13,352,0,115,10,127,5,164,253,498,237,165,341,197,4,86,15,170,125,23 }, +{ 15,120,13,141,23,260,217,515,1,77,51,110,180,700,317,82,269,137,115,202,21,753,64,5,351,291,0,450,352,93,36,326 }, +{ 26,6,112,396,19,145,25,122,648,287,42,74,624,222,416,45,138,66,644,151,113,651,29,573,64,280,445,27,525,85,70,58 }, +{ 156,360,5,146,121,21,271,522,354,132,49,13,18,195,16,340,60,591,446,586,727,0,107,407,167,48,1,463,199,566,32,23 }, +{ 5,61,49,147,178,612,660,120,21,182,23,427,259,683,33,4,77,70,13,3,376,98,64,0,481,344,48,595,291,263,141,51 }, +{ 89,79,468,179,358,205,94,405,115,498,72,180,365,431,37,111,341,734,188,317,482,217,11,4,245,152,413,216,12,474,490,752 }, +{ 24,16,35,68,18,71,7,92,0,108,9,14,118,101,336,175,375,302,28,124,154,55,149,60,398,1,65,2,140,273,345,230 }, +{ 51,730,421,801,453,386,23,523,13,475,719,401,670,365,899,403,115,457,758,165,33,494,450,6,423,805,629,56,569,514,958,388 }, +{ 113,45,6,311,29,2,151,614,145,491,112,80,5,27,61,74,315,66,209,631,19,25,58,17,73,26,1,243,70,64,611,287 }, +{ 4,339,188,471,11,59,79,12,377,94,99,33,77,102,51,111,37,152,13,961,474,542,40,342,3,23,128,403,202,177,184,57 }, +{ 15,4,515,11,700,33,82,40,0,120,753,10,8,110,13,93,23,165,77,260,64,31,22,51,44,102,351,1,125,9,197,21 }, +{ 16,24,18,0,35,68,28,71,124,118,60,7,9,55,14,92,109,101,419,175,22,252,154,375,149,302,158,346,2,49,1,126 }, +{ 17,45,227,21,106,3,2,243,209,5,48,32,221,62,207,50,29,186,290,270,263,52,14,496,400,119,46,255,54,430,38,721 }, +{ 340,354,586,658,156,195,698,668,1,296,9,18,883,363,447,379,303,98,411,13,31,163,51,5,371,48,919,846,121,21,360,70 }, +{ 277,153,111,12,23,51,474,99,38,37,139,117,41,457,79,453,542,13,11,33,134,157,629,188,961,14,196,401,102,569,15,94 }, +{ 0,18,16,159,49,24,9,105,35,68,7,28,22,1,60,344,55,101,109,2,14,158,13,23,71,118,455,286,272,424,5,327 }, +{ 0,105,9,49,16,18,158,28,518,24,101,320,1,68,170,301,272,127,7,286,35,890,109,39,159,98,21,344,31,55,371,23 }, +{ 141,1,180,15,13,2,365,217,515,352,317,115,341,0,4,5,269,700,23,21,3,752,197,77,753,51,31,901,10,202,8,64 }, +{ 4,23,51,33,19,17,102,153,485,880,40,403,196,26,300,453,27,117,78,0,12,200,47,5,11,14,342,99,53,77,475,2 }, +{ 62,184,56,440,130,229,183,3,556,152,99,162,12,266,17,548,136,57,305,161,123,14,452,4,383,403,257,34,40,84,33,139 }, +{ 13,23,77,141,64,202,33,51,269,115,0,102,21,4,217,128,5,32,318,137,291,9,15,2,180,10,3,317,177,515,7,6 }, +{ 1,22,36,105,170,0,86,2,31,28,239,64,55,5,10,98,9,44,127,95,654,67,301,143,13,12,49,23,320,141,83,21 }, +{ 15,515,700,753,0,1,13,2,23,901,5,8,51,82,9,180,457,4,7,12,3,6,10,120,341,141,22,898,197,351,115,260 }, +{ 1,39,274,98,100,265,190,30,438,310,166,223,88,96,909,31,264,625,530,9,382,812,21,252,593,0,254,539,44,131,23,778 }, +{ 18,212,167,118,363,1,447,411,146,60,271,16,781,121,647,9,621,562,21,478,664,68,815,5,354,98,48,101,24,446,777,463 }, +{ 24,28,22,0,7,1,2,16,14,65,35,49,158,95,109,159,55,105,10,18,124,9,67,5,239,149,12,289,108,68,21,424 }, +{ 105,22,131,272,286,98,55,239,1,31,320,9,127,327,36,185,28,374,86,219,0,64,187,44,578,164,224,913,535,115,601,13 }, +{ 22,31,28,301,127,98,44,0,105,1,512,395,9,293,109,299,95,338,239,125,242,116,36,320,55,841,900,685,599,23,13,763 }, +{ 2,1,58,29,5,14,52,46,186,334,45,155,151,50,400,75,38,69,502,61,48,227,223,7,163,17,262,67,549,21,70,113 }, +{ 7,107,135,232,97,14,2,92,66,16,172,192,278,387,298,356,38,35,448,52,46,43,60,29,20,126,324,526,357,359,64,5 }, +{ 20,43,104,426,173,7,560,414,707,784,319,81,0,861,422,819,38,74,715,52,376,97,879,32,330,22,49,64,66,95,192,526 }, +{ 104,74,636,66,204,0,355,81,222,25,29,319,145,784,20,65,90,4,174,194,7,64,6,746,138,173,750,715,91,43,192,32 }, +{ 0,9,101,35,68,39,65,28,252,124,67,154,364,336,100,166,30,1,289,55,149,346,16,114,158,88,439,24,429,22,570,194 }, +{ 57,14,4,231,236,585,176,59,369,23,361,13,719,51,300,342,12,457,56,3,62,38,202,401,34,46,2,322,11,215,210,507 }, +{ 1,2,15,3,141,0,515,5,33,700,13,64,77,180,6,128,753,10,4,269,102,202,11,7,134,197,352,120,117,318,12,291 }, +{ 5,1,21,202,13,32,48,23,0,61,259,22,494,120,70,49,51,18,137,128,465,12,178,115,2,453,403,141,58,3,90,450 }, +{ 141,205,4,72,59,79,245,11,352,94,152,76,247,216,21,188,452,217,497,12,89,37,111,339,588,77,64,875,864,115,358,464 }, +{ 15,515,700,753,0,1,2,13,5,4,23,3,8,341,365,51,115,10,120,457,6,141,77,197,31,7,165,9,202,450,961,260 }, +{ 5,2,50,14,58,38,171,46,29,1,45,186,17,52,155,218,48,281,61,487,54,36,67,21,328,334,151,227,760,114,400,133 }, +{ 457,120,70,125,318,64,23,48,795,291,202,761,751,415,77,846,269,758,21,237,96,260,391,165,87,1,128,5,221,13,137,763 }, +{ 13,23,51,33,4,40,117,102,453,64,153,196,0,77,15,11,12,475,1,65,134,10,515,22,21,14,700,59,403,141,2,753 }, +{ 229,152,57,266,452,381,432,12,313,184,99,471,17,4,62,339,157,3,129,59,128,11,369,37,77,38,40,123,5,497,188,257 }, +{ 49,28,109,22,159,9,272,95,105,131,55,35,254,168,39,327,169,0,1,286,175,374,347,158,420,67,36,194,312,424,627,346 }, +{ 5,2,61,29,45,58,80,311,1,17,209,227,52,243,106,869,454,151,592,496,48,334,14,155,6,186,46,171,75,21,255,667 }, +{ 244,44,110,141,260,30,269,352,839,131,574,228,373,276,1,406,219,717,217,137,253,224,120,93,36,31,567,116,661,187,341,88 }, +{ 12,99,79,139,11,453,196,51,277,474,111,23,542,37,94,188,33,13,401,775,40,961,313,102,4,339,153,485,629,134,300,431 }, +{ 16,35,9,0,68,24,149,69,67,18,1,114,65,230,71,7,103,133,50,167,212,118,101,191,140,64,399,28,124,283,55,565 }, +{ 88,30,274,435,131,613,190,100,93,829,166,1,187,795,530,127,382,957,960,160,31,137,466,264,39,800,406,254,28,473,521,219 }, +{ 167,16,18,118,212,24,60,71,101,68,191,9,375,411,363,35,0,1,589,199,302,21,447,55,146,126,92,271,647,121,562,48 }, +{ 64,141,86,177,77,128,147,597,304,95,269,102,275,4,352,49,120,5,372,194,465,13,588,237,947,216,202,180,612,751,107,534 }, +{ 18,65,90,403,523,289,240,214,194,102,701,475,202,217,283,862,389,51,33,0,494,421,453,817,84,64,847,899,352,13,23,437 }, +{ 13,51,23,202,5,12,21,128,15,115,0,1,141,120,64,32,4,2,515,403,165,457,3,10,700,99,453,318,719,450,308,401 }, +{ 98,223,393,31,1,271,834,791,167,44,202,64,93,697,5,116,77,125,450,446,212,18,541,293,51,120,195,132,284,13,807,765 }, +{ 15,515,700,753,4,11,23,13,40,51,82,165,0,110,93,33,141,64,120,5,10,77,3,102,180,32,202,125,8,197,31,21 }, +{ 15,515,700,753,0,1,13,2,901,23,5,341,3,51,82,8,4,180,961,9,115,10,12,6,898,7,351,141,134,22,31,120 }, +{ 234,416,77,5,315,639,325,202,147,198,113,49,450,61,455,142,0,21,22,342,329,494,178,58,102,427,318,230,13,120,43,470 }, +{ 60,146,16,18,156,126,121,271,199,360,132,24,167,0,640,10,71,522,21,92,5,340,107,354,118,150,22,195,446,35,28,212 }, +{ 4,361,11,14,56,368,377,161,27,12,300,77,59,200,17,554,202,33,40,494,495,21,210,80,757,25,128,23,19,38,444,53 }, +{ 141,82,217,351,15,352,120,1,180,260,515,64,854,36,700,317,752,372,13,269,77,753,922,21,349,23,202,110,93,137,51,373 }, +{ 15,515,700,753,77,13,0,1,23,33,102,2,51,4,3,5,291,217,10,9,450,120,341,7,317,6,11,117,115,8,260,180 }, +{ 15,515,120,13,700,23,77,141,1,260,0,753,180,51,137,202,115,365,110,291,217,5,128,9,21,341,197,269,2,450,317,165 }, +{ 174,6,348,85,138,74,280,204,66,233,192,355,289,65,81,580,636,353,25,91,104,343,673,214,64,95,42,712,792,32,194,90 }, +{ 152,497,452,59,4,216,11,79,94,77,128,188,269,339,588,33,76,529,318,32,141,471,12,202,111,21,5,51,37,90,72,177 }, +{ 417,499,10,141,253,244,110,559,8,564,180,260,728,120,352,638,642,341,951,206,143,752,901,93,137,661,922,373,44,31,811,197 }, +{ 13,77,23,33,4,51,0,102,128,59,141,40,64,115,177,10,137,22,202,2,7,11,90,1,117,180,269,14,49,6,134,3 }, +{ 1,2,22,0,36,5,67,50,14,28,12,86,38,46,83,168,194,65,103,114,49,7,10,95,21,69,23,24,128,51,55,13 }, +{ 17,106,119,207,255,306,742,378,84,62,136,45,3,5,240,80,61,56,209,383,311,790,655,32,2,440,76,151,58,29,179,263 }, +{ 3,128,1,141,2,202,33,5,64,15,0,515,102,13,269,10,700,180,134,51,120,6,77,318,23,137,17,117,753,197,82,153 }, +{ 514,38,377,328,11,57,41,248,880,266,556,4,152,361,471,757,485,403,305,102,3,211,313,99,457,130,12,14,157,40,23,54 }, +{ 68,0,167,101,9,118,264,520,16,18,21,478,562,1,124,212,100,936,664,777,191,88,806,154,48,24,759,604,35,252,265,65 }, +{ 230,689,699,213,466,352,217,831,30,443,418,144,854,201,840,855,1,251,203,317,530,957,96,93,822,539,36,752,351,137,83,800 }, +{ 33,77,102,117,15,82,13,134,23,64,0,515,120,153,51,4,40,128,700,260,202,141,196,22,753,11,351,10,1,326,95,269 }, +{ 11,40,33,51,117,13,542,328,14,134,38,153,23,12,485,231,102,54,775,37,3,377,111,139,211,4,457,403,369,475,99,719 }, +{ 33,64,77,128,141,2,1,202,102,13,23,117,0,15,3,153,51,134,10,40,6,5,515,269,137,180,318,165,700,7,196,753 }, +{ 15,515,700,753,4,1,5,11,13,21,33,180,93,141,64,2,23,77,82,3,0,102,32,40,352,341,10,197,98,110,117,901 }, +{ 1,2,14,67,50,46,38,24,103,83,0,5,36,28,29,133,114,96,65,52,18,75,54,108,22,7,238,58,160,9,361,69 }, +{ 258,201,276,137,160,860,116,261,295,843,567,144,131,44,187,268,943,219,284,31,202,935,141,98,662,203,127,96,36,93,224,1 }, +{ 7,2,14,16,46,87,75,52,92,278,29,38,140,70,1,5,35,294,24,262,135,69,171,172,58,409,112,60,50,66,97,12 }, +{ 13,23,0,2,51,1,33,4,115,10,15,141,77,3,5,180,217,515,9,7,64,11,700,6,102,40,197,22,317,753,165,202 }, +{ 74,145,6,66,25,204,42,29,222,337,138,26,7,525,192,174,746,287,544,135,415,2,609,632,112,64,87,0,85,45,712,396 }, +{ 77,33,102,15,217,13,23,141,202,515,51,700,291,4,269,753,317,180,21,64,318,115,128,0,275,2,352,196,3,5,137,11 }, +{ 187,219,258,871,44,442,160,574,137,224,908,116,839,131,36,926,276,201,93,228,202,860,31,613,144,531,406,1,902,30,190,318 }, +{ 1,372,141,5,21,77,225,744,96,30,23,349,13,291,269,284,69,442,459,144,303,839,217,622,160,330,260,48,120,410,189,352 }, +{ 66,222,2,74,29,87,135,6,7,145,52,25,294,337,226,172,138,331,42,70,97,112,26,1,632,192,43,5,415,609,461,353 }, +{ 45,17,106,209,5,2,21,29,48,207,3,186,243,155,255,263,454,119,400,496,270,14,290,62,425,1,171,32,659,52,38,56 }, +{ 93,88,141,120,30,213,260,373,100,717,459,82,110,1,166,450,180,321,217,372,36,269,131,225,22,352,326,466,473,187,244,410 }, +{ 266,57,152,381,313,471,12,229,99,369,339,62,157,3,4,37,77,38,188,17,11,162,40,184,129,59,475,775,128,452,403,453 }, +{ 217,352,317,141,752,15,180,515,372,365,700,341,753,349,77,21,291,1,115,244,64,120,13,98,269,82,5,498,864,351,23,144 }, +{ 14,514,369,102,403,377,51,719,880,153,23,13,457,11,485,4,401,12,328,453,33,40,117,57,629,38,730,236,134,670,361,961 }, +{ 107,7,172,14,92,135,2,359,60,314,46,16,126,278,232,150,279,32,38,392,298,5,35,97,24,192,259,288,330,52,356,312 }, +{ 0,4,25,13,59,90,65,23,26,19,18,12,5,216,91,51,389,33,77,11,22,85,27,81,21,177,746,45,42,194,37,123 }, +{ 5,49,315,202,416,77,455,639,450,21,197,137,350,13,408,0,329,318,494,344,61,402,64,509,347,120,113,48,95,713,308,401 }, +{ 130,47,381,390,59,90,200,214,289,6,65,472,29,64,874,648,50,751,624,26,52,32,4,194,875,714,85,249,247,33,881,19 }, +{ 51,23,453,13,719,12,457,165,37,730,99,4,386,197,401,17,11,2,3,15,5,961,475,6,515,64,54,700,32,115,0,403 }, +{ 15,515,1,13,700,2,23,0,753,5,3,180,51,4,165,12,141,21,197,457,7,115,6,9,352,10,120,202,8,341,11,77 }, +{ 0,9,1,67,35,28,68,16,24,65,18,69,50,114,103,12,22,13,5,101,2,96,23,83,149,21,39,55,7,175,433,124 }, +{ 28,105,22,0,1,320,170,9,49,301,109,95,127,31,98,55,65,35,2,24,168,159,36,713,16,740,13,338,21,44,512,23 }, +{ 13,77,4,51,23,33,102,202,128,59,40,0,64,141,117,403,115,11,15,318,153,269,22,515,475,134,10,494,177,1,90,210 }, +{ 13,23,0,51,77,33,2,141,4,10,1,64,115,102,3,6,22,15,217,11,180,7,40,515,165,202,177,9,269,128,700,5 }, +{ 456,116,492,8,949,268,867,391,203,51,499,13,719,386,31,791,457,918,125,10,23,93,479,685,417,0,22,338,506,551,870,730 }, +{ 17,237,45,180,106,62,32,64,115,41,136,498,255,21,197,129,241,13,3,227,23,352,165,752,350,365,449,155,4,546,476,38 }, +{ 1,15,180,515,0,2,341,700,901,352,4,141,13,3,752,5,753,217,317,115,365,23,197,21,51,165,31,6,269,202,77,7 }, +{ 205,141,216,269,497,4,588,76,59,152,128,452,79,77,875,11,72,94,188,217,352,12,247,37,90,64,32,1,474,23,947,372 }, +{ 64,247,217,237,317,180,752,115,349,141,498,13,437,304,23,372,352,164,579,291,33,864,177,197,0,490,72,10,482,77,269,51 }, +{ 2,1,0,13,15,141,3,77,5,515,64,33,23,180,6,700,4,117,217,7,10,11,102,165,753,197,115,134,40,352,12,269 }, +{ 11,40,38,328,33,542,12,313,41,339,23,157,377,117,369,51,471,99,775,485,13,305,457,57,14,475,37,248,4,54,188,719 }, +{ 33,77,102,40,13,23,0,51,4,128,64,202,117,141,22,196,153,10,134,15,59,269,1,137,65,11,403,318,453,86,515,177 }, +{ 472,80,34,250,495,161,17,14,469,176,128,4,389,106,283,436,216,527,3,297,483,177,53,56,231,194,119,84,719,57,255,59 }, +{ 317,352,180,141,217,752,115,341,365,244,1,269,202,901,253,15,21,498,372,4,137,515,13,2,700,318,5,197,23,143,753,349 }, +{ 9,39,101,18,265,100,333,520,252,16,0,329,593,1,553,364,68,167,310,30,121,254,118,158,363,166,60,604,272,24,286,404 }, +{ 15,515,1,180,700,901,0,2,753,341,752,4,3,13,115,365,317,5,23,197,141,217,165,352,6,22,36,9,137,51,7,10 }, +{ 131,39,9,829,166,613,578,827,1,30,716,254,100,98,31,224,0,406,228,310,616,219,44,846,127,190,938,96,265,371,856,438 }, +{ 17,64,62,106,141,751,136,292,32,129,352,41,38,476,86,128,214,237,5,177,123,209,217,45,269,954,162,710,180,3,90,4 }, +{ 25,42,235,65,650,736,605,6,630,85,123,343,233,256,26,122,63,389,141,249,416,444,368,194,19,108,138,174,90,0,544,511 }, +{ 184,229,152,57,266,432,497,452,17,381,619,257,313,12,4,205,59,3,99,471,157,128,5,129,339,369,77,11,32,45,202,2 }, +{ 137,202,160,860,141,30,93,567,36,276,295,261,131,39,9,964,201,843,1,98,800,318,116,22,943,187,10,219,206,44,269,535 }, +{ 0,493,125,64,49,9,279,10,35,18,93,55,293,31,14,13,194,165,325,48,22,132,21,107,98,389,44,581,342,259,174,137 }, +{ 15,515,700,753,4,33,13,77,23,5,51,32,102,40,93,11,349,141,21,8,82,202,64,31,110,10,117,0,1,44,3,318 }, +{ 110,253,854,811,352,141,244,951,180,642,661,384,498,143,752,317,911,10,269,206,559,351,261,120,902,533,922,959,365,160,332,217 }, +{ 2,29,70,1,75,52,6,220,26,112,145,331,74,163,19,69,38,324,46,58,14,5,25,21,278,223,50,307,66,7,67,409 }, +{ 13,23,77,33,51,4,64,141,115,102,0,2,128,177,40,11,202,10,6,180,7,15,269,1,32,217,59,22,291,3,137,515 }, +{ 340,897,691,478,658,264,914,382,100,812,363,1,724,156,166,698,88,521,39,404,682,447,296,96,303,411,30,909,9,274,656,772 }, +{ 9,18,310,101,265,159,326,120,105,158,33,363,77,195,51,55,13,39,354,132,23,7,28,639,16,137,98,1,252,272,709,49 }, +{ 57,313,471,12,99,369,157,339,266,152,38,37,475,453,328,775,11,40,59,188,77,514,401,403,342,4,139,33,377,51,229,14 }, +{ 16,7,24,14,35,140,60,92,18,69,71,2,189,1,46,230,108,388,150,38,21,172,278,67,246,267,50,309,236,135,451,0 }, +{ 206,417,93,940,959,473,499,203,8,137,559,728,31,202,44,120,450,141,10,260,116,564,22,326,269,318,268,244,0,1,253,638 }, +{ 15,515,700,753,1,0,13,2,23,4,3,51,5,217,7,77,341,115,8,9,10,33,6,180,317,349,291,120,11,165,457,901 }, +{ 1,2,5,14,48,21,290,32,50,45,38,46,263,207,155,72,76,29,17,408,425,171,89,52,7,0,292,449,3,227,513,428 }, +{ 121,132,354,167,271,223,146,98,18,463,1,668,446,195,407,60,212,447,781,48,360,363,411,522,156,393,807,9,21,16,293,13 }, +{ 131,578,105,371,219,224,716,616,187,49,9,254,737,159,385,98,258,127,272,761,0,916,623,910,28,286,39,31,22,518,924,242 }, +{ 302,467,97,6,273,1,24,484,124,51,36,18,2,398,453,421,523,69,7,23,13,403,386,150,66,0,298,65,426,165,22,158 }, +{ 30,190,530,88,1,100,778,539,625,274,382,410,96,731,960,39,795,321,9,131,264,144,840,748,44,166,669,957,36,31,435,228 }, +{ 141,1,2,128,64,33,15,202,3,0,180,5,13,77,515,134,269,102,197,700,10,137,318,6,120,165,753,352,4,82,23,117 }, +{ 44,201,567,116,131,224,295,662,489,268,219,31,434,144,187,276,110,384,93,261,699,137,36,442,120,1,613,30,228,64,141,244 }, +{ 12,15,51,23,515,37,99,13,700,0,10,117,753,38,165,82,134,120,11,453,197,64,115,569,1,629,401,22,457,474,110,153 }, +{ 7,135,2,92,172,14,66,140,38,52,97,46,29,74,16,324,278,226,6,87,1,571,262,5,357,232,35,380,69,314,24,330 }, +{ 125,386,23,963,949,60,51,391,165,221,13,197,118,21,719,193,541,421,517,150,393,7,401,453,308,5,791,551,326,558,48,173 }, +{ 6,85,42,25,138,222,174,235,280,256,525,289,26,214,64,746,90,32,544,65,204,19,66,337,355,95,348,415,74,29,5,312 }, +{ 1,14,5,50,2,67,24,0,46,69,48,21,58,103,16,12,18,38,54,96,83,7,502,45,36,181,35,9,430,28,10,155 }, +{ 811,351,642,180,951,752,110,638,253,10,82,352,197,341,365,564,499,854,873,55,9,417,282,901,244,22,559,143,206,141,28,898 }, +{ 23,13,51,15,12,453,403,165,4,515,115,719,475,457,700,523,2,21,0,99,202,197,14,5,386,753,128,401,37,308,33,117 }, +{ 120,13,23,77,141,1,15,93,217,82,260,51,137,202,110,515,21,180,165,5,128,102,64,351,291,700,269,352,326,203,177,0 }, +{ 1,5,0,22,12,2,36,21,10,23,86,13,28,51,9,128,48,14,32,50,7,3,96,137,54,4,202,49,37,65,208,323 }, +{ 219,98,23,127,301,51,258,308,170,910,13,165,22,105,293,616,125,242,276,401,201,395,964,115,55,284,31,374,327,206,512,900 }, +{ 64,180,80,165,5,237,2,250,34,58,297,61,197,17,22,29,186,498,231,445,247,3,752,311,95,32,483,153,27,45,115,469 }, +{ 13,77,23,33,0,2,1,64,141,51,102,10,15,3,115,40,180,6,515,128,7,22,269,202,4,217,700,5,177,117,14,165 }, +{ 15,120,51,515,13,450,23,700,202,153,196,753,260,64,128,141,730,4,326,386,21,523,33,318,5,457,95,32,403,1,77,269 }, +{ 2,1,5,29,32,45,207,263,14,425,58,72,76,21,7,408,48,46,52,186,17,292,38,6,61,89,476,50,155,720,119,3 }, +{ 15,515,700,753,4,13,11,5,1,23,33,21,3,141,32,2,40,180,117,64,269,202,102,197,0,165,120,51,341,352,153,12 }, +{ 76,5,214,129,2,123,45,710,17,249,618,460,179,32,1,257,205,519,90,207,245,184,162,61,769,209,292,106,6,29,14,128 }, +{ 1,15,23,13,120,141,51,515,202,21,700,165,0,180,137,2,5,77,128,93,753,260,269,197,326,33,110,352,82,102,318,48 }, +{ 7,2,135,14,29,87,66,52,97,172,70,112,5,58,46,337,92,16,20,43,1,38,232,155,74,294,6,461,409,151,262,32 }, +{ 574,187,384,926,860,110,258,434,269,531,141,244,160,261,253,116,699,959,940,717,533,36,219,31,902,661,871,295,201,352,10,260 }, +{ 156,354,296,1,182,586,64,379,340,937,850,698,31,48,98,44,120,18,163,23,30,658,195,125,77,284,223,291,774,481,96,39 }, +{ 250,80,34,472,17,495,176,469,33,194,64,483,4,297,141,14,161,27,53,667,56,833,73,527,585,231,106,51,84,814,2,59 }, +{ 97,7,81,140,66,92,172,192,24,298,43,6,74,69,314,426,462,14,501,16,21,508,60,189,267,232,230,104,48,20,135,330 }, +{ 31,44,116,144,268,393,492,434,367,489,127,98,918,0,384,9,22,206,948,105,93,203,1,456,332,940,299,28,137,49,293,125 }, +{ 15,128,33,3,13,51,141,1,202,64,23,2,515,120,102,0,5,82,10,700,165,197,269,153,403,110,753,137,196,318,117,12 }, +{ 31,98,127,9,0,105,22,28,44,512,293,395,299,1,242,49,685,763,320,599,125,116,109,276,284,95,870,159,23,456,36,900 }, +{ 7,24,124,1,6,97,2,69,14,18,23,92,21,67,66,16,5,484,43,20,118,65,36,22,28,0,51,140,13,71,29,150 }, +{ 1,64,442,303,284,349,202,141,622,67,154,447,260,44,652,429,9,335,237,919,197,98,167,33,682,269,547,77,863,411,340,201 }, +{ 1,15,2,141,515,0,700,13,3,180,10,753,5,64,77,33,4,6,7,197,102,269,165,23,134,11,352,341,291,349,22,120 }, +{ 99,139,12,453,196,277,775,40,475,33,23,401,215,51,11,14,77,111,313,130,38,211,37,266,129,15,339,153,719,3,369,515 }, +{ 33,77,102,4,23,128,13,141,202,64,51,0,40,59,269,115,117,137,153,1,318,11,10,177,15,134,22,90,196,2,403,32 }, +{ 7,2,14,58,70,112,16,5,87,38,46,52,6,128,135,1,32,21,155,29,66,64,0,97,92,186,172,294,13,23,20,37 }, +{ 15,13,515,1,700,2,23,0,753,5,3,4,51,10,341,115,365,180,11,33,317,77,6,7,217,12,197,165,117,9,64,102 }, +{ 2,1,14,29,75,69,67,6,52,46,38,24,103,220,83,25,70,87,262,74,96,267,50,366,26,16,226,394,357,66,108,19 }, +{ 9,105,18,39,1,0,16,557,101,272,252,890,326,49,265,21,137,100,23,938,13,310,159,5,31,24,254,51,30,128,202,132 }, +{ 80,209,45,61,667,17,6,106,5,2,151,29,483,255,454,833,27,311,112,19,738,378,1,58,113,26,25,469,119,887,32,64 }, +{ 13,23,51,15,5,1,515,0,21,2,12,141,700,165,202,115,753,32,180,4,3,197,10,120,457,9,269,128,64,341,7,33 }, +{ 99,12,453,277,139,157,369,474,339,51,38,23,37,196,188,401,775,111,11,313,328,475,153,266,4,471,79,40,33,629,102,14 }, +{ 7,92,16,232,97,140,126,14,60,107,66,35,298,387,314,104,246,462,441,150,0,38,24,2,172,357,230,330,5,633,22,289 }, +{ 13,77,23,202,318,141,33,4,51,269,102,177,115,403,137,2,40,494,90,11,342,128,31,117,21,32,7,12,64,134,14,10 }, +{ 13,2,0,23,141,1,77,3,180,33,6,64,15,10,115,51,4,5,217,197,7,165,515,102,22,11,700,269,40,352,177,14 }, +{ 15,515,700,753,4,11,1,93,13,5,180,110,82,21,120,23,2,33,10,141,3,165,197,102,901,0,32,341,117,40,153,12 }, +{ 15,515,700,753,1,13,0,2,23,4,77,51,3,5,341,291,7,33,6,115,10,9,8,217,11,177,120,180,102,165,197,365 }, +{ 20,43,198,325,173,904,104,234,66,147,77,319,416,422,97,426,5,0,7,450,861,202,712,725,2,32,639,376,38,324,945,315 }, +{ 105,0,9,28,49,301,170,1,127,159,22,16,31,98,512,623,24,109,158,395,35,68,371,65,713,55,2,242,293,21,44,18 }, +{ 213,88,689,466,230,30,321,435,699,352,217,201,795,831,144,854,1,443,96,539,530,840,418,251,855,190,93,100,669,31,957,662 }, +{ 130,453,47,196,4,57,14,59,236,711,51,153,730,77,412,381,23,202,108,128,361,13,283,117,11,719,200,46,34,78,210,2 }, +{ 1,2,5,14,0,50,36,22,38,46,65,67,12,86,114,28,103,29,208,7,10,128,21,83,218,23,96,54,194,6,133,51 }, +{ 6,26,74,19,165,453,14,730,1,125,197,50,29,51,138,357,13,2,108,391,70,719,46,457,47,500,386,262,112,23,235,52 }, +{ 9,10,376,20,43,0,49,18,30,120,2,33,325,104,501,470,77,788,725,102,523,39,858,5,904,414,174,55,137,37,342,13 }, +{ 15,515,700,753,0,1,13,23,51,77,120,202,341,82,5,4,9,260,2,137,141,128,115,351,901,8,180,10,197,21,450,33 }, +{ 105,131,272,578,9,49,371,219,159,616,286,320,224,187,716,98,28,22,0,623,127,258,910,737,385,31,239,347,254,109,424,95 }, +{ 457,51,13,23,961,12,719,99,453,15,4,515,165,401,629,3,700,11,17,14,2,37,753,41,57,569,38,45,0,33,5,32 }, +{ 202,120,5,33,318,77,450,102,1,260,403,128,494,21,165,13,269,12,326,23,342,523,402,2,817,64,15,141,125,82,457,475 }, +{ 141,269,352,217,180,64,349,137,202,160,317,15,372,515,700,752,318,753,244,13,437,291,165,864,22,237,5,82,954,21,77,418 }, +{ 70,29,2,145,74,112,26,6,75,52,19,66,632,1,87,220,5,135,163,287,307,25,226,7,58,396,294,278,113,409,69,151 }, +{ 82,351,317,15,752,180,898,352,141,901,515,341,10,700,365,1,753,498,0,217,253,115,55,854,33,5,143,32,21,160,36,197 }, +{ 39,9,310,254,0,30,101,49,252,272,100,265,105,455,159,557,190,333,286,688,18,166,1,158,709,16,625,627,31,131,327,329 }, +{ 2,58,29,5,1,151,186,52,70,45,7,549,14,75,112,400,113,155,61,46,227,163,311,315,66,6,307,27,17,220,287,74 }, +{ 141,217,13,21,352,23,269,77,180,115,317,64,202,15,349,137,5,51,165,291,318,752,372,4,0,102,33,365,197,32,341,125 }, +{ 68,35,0,9,65,101,149,124,24,154,175,16,28,7,67,1,18,189,114,398,55,14,345,39,118,133,69,2,230,429,71,283 }, +{ 66,7,29,2,112,52,20,43,97,151,74,192,135,5,173,525,337,45,145,58,415,25,14,32,644,70,544,226,222,21,6,580 }, +{ 31,125,44,22,116,299,242,55,1,170,64,36,479,870,456,685,10,599,558,0,268,506,28,740,23,903,492,164,393,206,2,86 }, +{ 188,11,79,12,99,377,94,33,542,339,40,474,111,37,4,51,102,453,139,775,13,475,23,961,277,471,134,57,431,266,115,117 }, +{ 658,698,340,98,296,303,1,31,850,363,156,919,44,774,586,385,120,77,82,10,223,30,354,291,23,914,478,87,260,163,48,13 }, +{ 15,515,700,753,82,4,1,13,901,33,197,11,5,10,23,165,2,0,180,3,21,77,51,120,365,115,217,40,117,102,32,401 }, +{ 15,515,700,753,4,11,5,13,1,141,3,180,23,202,21,2,269,64,165,33,40,32,0,318,120,128,12,197,117,352,51,17 }, +{ 91,6,233,85,370,718,81,65,25,256,63,343,42,74,235,123,138,511,397,249,26,194,650,355,64,87,544,18,90,643,66,214 }, +{ 23,13,202,51,21,120,1,5,141,128,450,64,318,403,15,137,260,33,12,48,32,31,125,494,269,102,165,515,77,2,197,14 }, +{ 180,317,365,341,752,217,115,352,901,482,372,498,1,141,15,253,515,244,2,700,0,21,13,82,23,4,579,351,753,291,269,77 }, +{ 13,115,197,341,9,352,468,237,64,498,23,165,22,509,901,546,482,180,28,569,317,51,365,873,391,95,86,217,49,837,752,706 }, +{ 13,23,51,1,141,5,165,202,21,120,64,125,180,15,2,33,197,115,128,32,260,269,12,82,4,515,137,7,318,93,0,700 }, +{ 214,289,90,174,874,6,138,280,65,81,64,85,355,751,194,233,312,348,835,91,0,32,343,636,249,29,875,288,519,104,247,74 }, +{ 15,515,700,753,4,5,11,13,1,33,23,21,2,3,102,32,141,77,180,117,31,64,0,40,134,196,120,352,12,44,197,6 }, +{ 33,15,13,515,117,23,700,217,134,753,0,51,153,77,141,2,4,64,196,1,3,180,10,115,5,102,6,11,22,202,165,7 }, +{ 15,515,700,753,33,4,77,102,1,40,13,117,11,115,134,5,21,153,23,217,3,32,2,317,120,196,180,141,51,12,59,260 }, +{ 15,515,700,753,13,0,1,23,2,217,51,3,4,5,8,317,115,9,341,10,202,180,6,365,7,82,457,22,120,901,33,291 }, +{ 7,2,135,20,97,14,66,52,337,673,192,29,43,355,353,5,16,294,107,376,147,226,331,560,64,470,222,104,415,32,4,324 }, +{ 195,132,142,167,146,77,363,271,121,354,202,120,647,178,786,212,687,0,101,878,16,522,60,5,450,411,35,55,98,639,259,318 }, +{ 202,77,20,0,318,66,104,128,102,269,177,43,33,7,216,291,494,5,2,342,74,173,97,112,450,22,337,10,234,52,64,678 }, +{ 107,362,612,356,359,97,414,43,259,20,392,7,298,147,819,683,465,173,729,660,319,14,5,779,581,595,246,35,501,92,0,230 }, +{ 6,165,14,453,13,51,19,23,386,457,74,391,308,2,26,401,47,758,603,108,719,366,1,29,309,730,324,197,133,70,115,867 }, +{ 179,72,205,180,247,245,4,490,352,59,317,152,79,498,94,217,148,76,752,864,11,216,141,405,89,452,197,111,497,188,37,21 }, +{ 107,7,298,314,14,359,32,392,232,279,172,97,60,581,387,126,121,0,534,493,356,92,441,95,13,21,35,147,22,5,16,362 }, +{ 156,271,354,586,360,132,591,195,121,18,340,1,5,13,21,48,668,446,23,463,296,658,60,55,407,698,146,70,626,51,163,24 }, +{ 13,23,51,4,0,12,457,15,11,453,2,515,5,1,99,10,115,165,700,475,401,403,3,961,40,14,37,753,719,32,64,569 }, +{ 48,125,21,165,13,221,23,763,423,508,197,5,98,92,193,16,441,386,64,314,293,457,391,140,49,60,102,693,683,51,35,867 }, +{ 202,77,120,450,5,318,1,494,0,195,18,132,523,403,326,604,354,260,121,576,203,167,234,817,682,49,35,615,21,20,13,102 }, +{ 39,9,166,30,0,101,158,68,404,190,333,274,252,310,88,100,49,28,344,35,21,22,419,131,438,1,16,65,530,694,124,10 }, +{ 15,515,700,753,110,4,1,11,165,180,93,13,82,5,2,197,33,120,0,3,10,23,21,115,901,217,341,77,317,51,32,117 }, +{ 2,29,1,14,6,52,5,46,50,26,70,19,103,58,38,67,96,262,516,309,218,133,108,27,75,17,112,114,24,487,331,83 }, +{ 120,77,15,13,1,141,260,23,515,217,110,51,137,700,317,202,165,291,180,21,753,128,0,177,326,93,450,82,64,269,197,5 }, +{ 255,59,554,297,183,56,33,444,108,358,123,196,269,122,77,153,57,177,117,730,19,467,605,130,128,50,275,4,291,475,134,133 }, +{ 13,23,51,12,153,14,117,120,165,134,99,401,38,453,15,128,197,719,64,515,475,403,37,33,196,700,40,125,5,0,54,2 }, +{ 64,33,174,348,95,108,467,554,56,0,25,306,233,6,63,511,343,120,13,85,29,561,543,707,319,180,899,355,77,49,256,18 }, +{ 120,260,51,23,77,15,202,1,93,82,141,450,13,326,515,137,21,5,64,33,110,700,128,165,318,203,269,102,351,753,197,125 }, +{ 15,515,700,753,4,13,11,1,5,21,23,2,33,64,3,180,32,141,22,102,77,0,10,93,82,352,117,40,341,31,165,6 }, +{ 15,515,700,753,341,13,23,141,33,1,0,217,4,77,180,10,82,351,51,137,5,64,9,317,21,11,102,40,260,202,854,115 }, +{ 105,272,131,22,327,286,28,239,320,9,109,578,219,49,98,224,95,159,538,371,616,127,187,64,713,55,0,170,168,258,716,623 }, +{ 16,18,68,35,24,60,71,118,92,126,0,9,101,191,7,55,154,175,212,14,167,150,302,28,375,1,107,124,346,273,21,108 }, +{ 20,147,43,470,376,142,904,178,427,798,0,595,198,325,858,319,61,202,173,97,5,422,14,22,107,259,32,49,887,77,414,392 }, +{ 13,23,51,12,33,15,99,64,128,515,453,202,117,153,37,102,700,40,134,196,120,0,2,753,141,14,38,3,82,403,77,21 }, +{ 383,17,62,136,84,119,56,440,3,504,240,80,378,129,123,548,106,128,4,11,14,555,162,32,184,361,59,64,205,5,469,57 }, +{ 70,1,48,652,5,638,846,888,21,349,269,260,340,562,767,761,163,883,774,141,125,518,591,0,23,9,87,13,371,303,622,31 }, +{ 66,135,6,97,74,278,69,7,14,324,267,172,2,140,462,1,357,38,808,550,92,841,189,29,16,25,298,87,75,204,24,335 }, +{ 51,23,33,13,102,40,12,128,64,77,10,202,0,196,117,4,14,99,134,453,65,153,11,475,139,403,22,141,86,2,21,15 }, +{ 88,100,264,166,274,435,772,1,382,921,96,478,30,438,639,909,897,521,190,466,960,410,9,144,530,418,31,329,265,691,778,93 }, +{ 62,440,136,56,84,3,504,548,555,383,4,17,129,128,507,361,123,59,119,162,14,57,152,328,161,11,202,495,184,27,80,215 }, +{ 911,617,332,959,206,141,253,244,282,384,110,120,10,260,352,143,951,811,269,373,160,417,93,531,728,203,434,940,137,55,36,717 }, +{ 120,15,260,141,77,1,515,82,700,351,33,23,450,13,110,326,64,217,269,753,203,137,102,5,165,21,51,291,93,177,373,128 }, +{ 15,515,700,753,0,1,2,23,13,51,5,9,82,901,180,8,3,4,120,6,7,141,93,12,197,341,10,33,115,730,64,125 }, +{ 7,104,97,107,356,232,66,560,298,289,14,707,38,568,359,64,20,0,65,324,22,214,92,32,192,5,387,43,712,90,172,95 }, +{ 6,1,2,66,67,14,74,24,108,29,69,83,458,7,25,38,135,103,36,150,451,114,52,594,75,65,380,18,267,602,19,278 }, +{ 13,23,51,12,115,21,202,5,457,15,4,1,64,719,0,403,2,3,453,165,99,141,401,128,32,515,10,37,523,197,120,700 }, +{ 57,59,4,11,412,381,77,53,421,291,250,368,99,14,27,369,803,283,23,108,403,19,339,210,0,401,12,444,236,40,361,736 }, +{ 15,515,700,1,0,753,2,13,23,5,51,180,3,115,6,7,457,4,9,8,12,82,197,165,141,901,120,719,33,64,21,22 }, +{ 64,95,180,247,929,146,90,126,197,32,237,60,288,165,316,92,5,13,77,7,217,955,522,22,16,314,132,4,317,10,312,86 }, +{ 15,1,120,13,23,515,0,51,700,180,141,2,5,202,21,260,753,165,137,33,77,110,197,128,326,7,450,4,102,9,269,12 }, +{ 14,2,16,46,1,7,24,69,75,35,38,50,29,220,52,140,267,67,18,54,70,309,5,60,92,189,171,87,71,163,58,0 }, +{ 31,98,127,44,9,299,0,276,293,284,116,49,935,599,105,22,456,201,28,1,39,125,242,137,371,144,131,492,159,272,51,395 }, +{ 6,27,151,53,573,445,297,113,26,73,436,19,491,250,396,315,45,112,145,58,614,881,25,34,611,200,17,80,70,5,138,631 }, +{ 32,693,81,788,90,804,403,56,494,21,84,397,202,65,18,77,64,681,214,725,523,784,526,33,102,825,240,0,115,241,817,91 }, +{ 24,7,14,2,18,16,65,0,108,149,28,69,1,71,154,36,124,35,67,140,189,429,92,68,66,22,55,118,302,150,9,6 }, +{ 0,68,9,35,65,101,189,212,114,67,124,69,1,154,149,39,230,64,252,16,88,702,103,100,18,336,28,329,520,83,30,755 }, +{ 5,2,186,29,61,45,17,1,52,48,58,171,155,227,80,209,311,21,14,46,50,106,243,513,334,502,496,38,3,6,32,592 }, +{ 15,515,700,753,13,1,2,0,3,4,5,23,341,11,10,33,6,51,165,117,153,7,180,12,365,901,77,569,197,115,64,9 }, +{ 13,15,23,515,0,51,1,700,4,2,753,10,3,5,12,77,33,961,165,457,197,11,115,9,22,102,40,403,202,21,14,59 }, +{ 15,515,700,753,13,0,1,23,2,33,102,5,4,10,9,3,51,115,77,7,6,341,12,11,217,40,457,196,180,165,8,523 }, +{ 166,39,30,274,190,100,333,438,530,310,88,252,0,9,539,265,1,656,404,101,625,131,778,254,31,455,676,329,724,158,21,23 }, +{ 734,148,94,308,431,115,37,89,111,413,79,468,197,629,341,474,569,12,13,873,179,401,11,4,180,23,205,72,59,365,134,51 }, +{ 539,228,224,219,816,190,30,258,871,840,669,93,406,530,957,187,160,531,748,137,131,88,863,36,728,839,44,213,352,116,202,466 }, +{ 393,791,125,801,730,551,386,23,31,175,93,98,51,13,144,788,126,203,21,345,116,22,949,110,575,165,326,44,0,4,60,221 }, +{ 13,23,77,141,0,4,51,2,33,115,64,1,10,3,6,15,11,102,7,217,180,40,515,22,128,177,202,9,700,269,165,5 }, +{ 2,29,7,70,52,14,1,58,112,46,75,5,171,163,87,220,307,151,186,334,38,66,155,16,69,135,278,45,262,97,6,21 }, +{ 88,321,213,100,230,435,689,466,1,382,30,352,217,699,410,96,795,36,921,752,190,141,144,180,44,831,317,83,443,31,840,251 }, +{ 363,411,101,520,354,9,195,668,132,156,447,1,905,364,18,23,765,664,146,5,360,13,121,96,98,31,252,39,100,759,264,551 }, +{ 13,23,51,730,12,719,453,457,401,475,5,21,403,2,0,1,15,4,3,899,99,32,165,11,515,308,197,115,6,961,700,523 }, +{ 72,76,89,12,37,4,308,179,38,528,90,431,54,205,148,184,401,57,152,474,23,59,51,245,428,11,32,99,405,316,257,21 }, +{ 376,20,43,147,470,173,97,595,107,319,414,142,819,5,729,178,858,7,427,32,426,104,14,0,392,362,259,61,230,77,560,246 }, +{ 202,141,269,494,318,137,51,128,403,4,217,96,77,5,64,177,291,180,15,352,102,10,33,349,2,317,0,341,120,515,21,453 }, +{ 77,202,33,128,102,318,494,269,13,0,117,23,342,291,403,15,134,51,153,141,177,515,82,137,196,700,203,64,22,351,753,4 }, +{ 253,110,951,352,499,811,10,854,180,638,244,559,642,752,564,8,141,143,417,341,901,260,206,197,922,661,93,15,498,373,165,911 }, +{ 141,13,23,180,4,217,5,1,269,317,21,0,2,202,115,51,352,77,3,197,64,341,318,15,291,9,137,93,32,165,515,33 }, +{ 9,0,18,252,16,101,68,39,24,118,35,109,158,329,28,167,60,364,333,265,49,100,22,419,553,55,1,677,71,7,212,159 }, +{ 28,109,9,39,0,158,49,22,168,35,55,175,1,65,67,185,194,159,289,95,272,114,30,105,86,584,36,169,254,2,83,24 }, +{ 15,515,13,700,1,753,2,23,0,3,4,5,33,341,11,51,6,10,197,115,901,180,77,40,102,12,365,165,141,217,7,317 }, +{ 173,693,104,422,5,18,61,32,102,0,20,13,784,560,33,66,397,526,49,207,29,25,510,707,65,6,11,344,21,263,81,77 }, +{ 23,13,386,51,308,801,719,221,401,949,21,730,165,421,102,115,125,33,341,670,468,117,770,1,120,6,197,14,403,97,67,958 }, +{ 0,49,105,16,28,24,159,9,158,320,1,68,35,239,170,18,109,7,55,65,2,95,301,124,347,14,21,154,22,127,286,31 }, +{ 2,5,1,207,45,29,32,58,76,61,6,263,292,655,72,14,17,476,7,119,52,306,70,64,21,90,186,214,106,38,3,790 }, +{ 21,6,125,49,13,64,715,66,115,95,197,33,22,32,204,165,56,278,0,408,241,120,4,808,681,350,263,85,81,571,135,509 }, +{ 612,427,325,107,202,5,376,49,64,392,403,470,21,147,31,788,494,14,362,465,858,98,20,804,518,43,845,318,125,97,725,534 }, +{ 32,21,76,72,2,1,14,5,241,449,89,38,350,221,155,48,50,292,37,46,45,90,270,54,17,179,214,12,148,430,476,413 }, +{ 24,0,28,16,7,124,35,154,14,149,65,18,9,68,55,108,175,71,2,1,22,109,92,67,484,336,118,69,302,398,570,420 }, +{ 1,5,14,2,48,50,38,67,46,21,0,54,45,270,281,12,24,32,155,96,513,103,290,83,61,58,36,17,37,72,69,181 }, +{ 13,961,569,197,37,15,23,474,515,94,148,111,12,165,629,341,700,79,901,401,51,405,753,10,134,4,115,734,873,11,89,117 }, +{ 33,23,102,51,13,40,77,128,64,202,141,15,4,12,0,1,2,117,22,11,10,403,153,515,99,318,137,269,139,196,700,134 }, +{ 0,1,24,67,9,16,18,35,28,69,103,50,5,2,65,12,83,68,7,96,14,22,21,149,75,114,13,133,23,71,218,54 }, +{ 384,617,940,332,855,911,206,959,434,282,141,10,93,253,244,110,144,268,120,36,352,137,417,203,116,31,44,269,160,201,143,951 }, +{ 30,93,473,137,31,704,450,652,190,203,800,254,166,274,326,144,269,160,127,303,120,625,88,848,110,435,77,521,349,131,340,744 }, +{ 53,27,73,26,19,250,297,200,25,630,17,6,611,122,34,42,714,235,472,65,436,14,80,684,690,106,45,113,680,108,64,4 }, +{ 15,515,1,2,700,0,753,3,5,141,180,4,13,77,33,10,217,6,7,134,11,352,197,64,165,341,317,23,12,115,102,40 }, +{ 254,530,39,613,688,221,30,31,438,190,228,960,1,44,141,21,180,406,23,166,9,202,13,96,137,48,131,829,317,269,393,51 }, +{ 9,39,28,35,30,166,158,36,0,175,101,346,364,67,49,68,168,420,88,1,194,131,100,352,55,83,190,64,137,570,86,65 }, +{ 62,56,3,548,555,507,440,161,34,4,215,136,162,514,361,527,17,14,211,130,328,11,383,123,84,183,38,57,184,152,205,494 }, +{ 92,126,107,7,356,493,97,279,359,298,16,246,35,60,14,441,362,121,43,423,5,132,392,20,508,230,199,146,232,173,150,414 }, +{ 15,82,141,515,291,922,349,700,217,260,372,120,351,93,77,753,318,352,373,854,1,326,269,21,13,102,144,202,64,23,203,137 }, +{ 141,217,352,115,180,13,269,317,752,77,23,21,341,197,5,372,244,291,9,64,51,102,4,1,365,2,165,33,3,48,237,351 }, +{ 78,47,390,19,130,453,108,27,711,813,730,444,412,283,196,690,123,14,128,26,250,389,650,236,200,65,51,4,34,183,297,73 }, +{ 34,250,297,80,472,64,495,17,311,3,148,45,667,61,176,53,243,27,90,161,469,141,483,151,62,128,29,4,58,56,5,231 }, +{ 51,23,33,13,551,77,102,326,421,21,523,120,5,899,453,692,202,153,308,615,115,958,450,401,791,68,221,93,475,18,403,4 }, +{ 98,223,393,363,411,1,478,834,664,156,284,691,447,791,914,293,354,724,697,9,807,541,759,51,18,421,48,264,948,586,195,848 }, +{ 7,14,107,232,16,92,2,60,46,5,359,121,24,526,220,620,135,1,172,21,126,314,132,77,18,75,32,278,12,23,52,38 }, +{ 32,76,2,1,21,72,241,14,5,48,292,89,476,45,720,270,179,90,17,214,148,38,50,29,129,155,350,46,290,227,123,464 }, +{ 15,515,700,753,13,23,33,77,51,4,102,0,32,202,1,11,128,82,117,141,40,5,110,8,3,90,137,21,10,318,403,165 }, +{ 66,6,69,2,1,74,14,135,278,267,380,24,29,97,67,38,103,75,7,388,324,25,52,150,87,83,189,357,335,108,204,172 }, +{ 152,4,339,59,79,471,188,11,77,94,128,33,529,377,12,111,102,202,452,402,216,99,13,542,51,40,474,37,64,291,23,961 }, +{ 15,515,700,753,1,0,196,13,33,2,77,5,23,102,3,10,9,7,217,4,6,153,117,177,14,457,115,12,40,730,11,134 }, +{ 17,209,45,106,207,5,255,119,62,2,61,3,263,742,306,655,425,378,32,56,29,136,84,80,311,58,186,240,243,383,14,21 }, +{ 120,260,450,15,1,23,817,13,515,523,326,5,700,51,82,31,202,64,21,753,318,93,32,269,98,33,351,77,102,125,457,165 }, +{ 116,492,268,93,23,206,203,0,551,918,13,51,8,22,417,940,120,10,499,31,949,791,125,523,165,473,341,730,421,959,401,391 }, +{ 15,515,700,753,165,13,0,1,197,23,4,82,120,2,180,12,260,719,8,3,386,117,5,523,901,11,341,51,10,9,141,351 }, +{ 14,24,69,7,2,66,108,1,67,6,36,398,18,267,150,97,29,38,83,149,65,74,28,0,189,71,388,16,273,124,46,22 }, +{ 330,96,523,335,367,662,141,839,1,922,372,615,244,717,269,443,418,352,403,692,217,854,752,180,36,64,498,576,349,201,98,284 }, +{ 184,90,257,205,245,229,57,152,769,17,524,5,32,497,45,432,619,2,452,266,4,106,1,21,179,59,76,3,460,292,381,128 }, +{ 7,14,16,2,46,5,70,107,87,13,58,307,92,32,38,23,202,0,172,24,18,21,60,128,77,35,20,10,9,4,171,112 }, +{ 7,66,140,16,14,92,97,69,267,172,189,24,380,2,35,60,298,451,230,135,314,74,150,71,38,357,6,330,67,423,21,443 }, +{ 121,167,354,132,18,446,147,101,212,146,407,16,55,35,647,191,20,271,199,68,60,259,463,107,9,126,363,7,195,43,14,411 }, +{ 76,90,179,32,205,21,184,460,257,288,45,245,316,5,57,152,241,2,358,1,229,72,524,148,48,769,17,4,12,38,14,720 }, +{ 147,259,178,878,427,465,581,198,786,798,142,534,325,929,20,362,35,132,107,376,43,5,279,77,49,146,70,202,590,771,33,14 }, +{ 473,93,450,778,141,30,855,466,144,203,330,530,88,523,459,372,201,617,839,704,254,321,934,326,39,36,82,717,332,213,559,403 }, +{ 523,475,51,899,730,453,23,719,403,33,457,13,421,386,4,120,117,196,102,153,15,801,450,817,515,260,202,11,700,99,165,125 }, +{ 15,1,13,515,0,2,700,5,23,753,4,3,341,317,10,115,180,11,33,64,217,77,117,165,197,7,6,365,9,141,102,134 }, +{ 19,4,119,40,33,202,27,84,102,56,77,73,504,485,26,494,757,63,862,59,23,300,25,12,128,11,5,13,342,880,469,6 }, +{ 32,20,2,13,5,21,23,6,12,38,43,29,64,7,95,51,61,207,48,147,90,178,17,182,49,0,115,202,52,362,37,22 }, +{ 339,188,11,79,4,94,377,12,99,111,542,102,37,33,474,51,471,40,453,152,77,13,59,403,342,23,117,57,475,134,128,38 }, +{ 34,128,283,176,495,231,318,432,503,275,529,527,161,53,3,202,56,291,585,469,73,17,14,412,57,27,80,245,250,381,402,51 }, +{ 15,515,13,700,1,217,141,120,23,180,753,115,365,51,317,341,77,260,0,291,110,137,202,5,21,269,64,36,349,2,4,10 }, +{ 13,15,961,515,700,753,4,12,2,457,3,11,197,51,37,569,115,23,5,0,99,10,1,134,6,111,165,33,72,40,38,79 }, +{ 15,515,700,753,13,1,0,2,23,33,5,3,10,4,9,115,7,102,6,51,12,217,77,11,40,457,569,341,117,317,14,719 }, +{ 5,76,2,32,292,214,45,1,129,519,123,179,90,710,17,29,460,72,14,207,21,249,58,205,464,263,618,48,6,245,3,257 }, +{ 72,76,32,4,21,12,38,23,99,54,89,3,14,17,51,57,11,90,13,488,179,2,59,148,45,37,5,115,401,1,10,421 }, +{ 98,223,393,1,834,264,284,791,724,293,478,772,697,909,363,682,905,447,541,821,411,51,421,9,807,48,765,31,730,96,386,410 }, +{ 341,13,509,8,23,638,165,901,762,10,569,242,391,197,873,642,506,499,629,961,15,180,116,456,206,546,417,1,338,457,515,867 }, +{ 1,2,5,50,14,38,46,114,0,36,29,22,218,65,86,96,137,21,133,285,12,10,323,181,17,58,51,23,67,7,28,6 }, +{ 481,878,202,13,5,23,182,32,269,21,1,318,77,142,557,494,141,33,640,137,70,291,2,51,260,415,929,403,120,58,4,259 }, +{ 15,515,700,753,1,4,13,0,2,5,341,3,11,180,134,12,10,317,197,365,33,21,23,165,117,6,77,7,217,37,32,498 }, +{ 25,119,19,6,26,42,27,17,4,790,45,814,2,469,483,84,122,1,0,33,32,128,76,80,611,113,73,56,5,240,202,77 }, +{ 14,2,7,1,24,0,65,6,16,69,67,22,124,28,108,5,18,36,86,10,38,46,66,398,289,168,12,83,21,23,610,13 }, +{ 51,23,128,13,15,202,12,120,33,64,141,82,10,515,0,403,700,3,1,99,117,269,153,165,753,5,318,197,102,260,2,137 }, +{ 16,35,24,0,9,18,7,1,68,69,50,71,103,65,67,189,133,23,28,13,60,537,149,335,75,21,64,5,114,2,12,14 }, +{ 754,803,133,576,880,543,2,1,657,50,14,38,46,5,29,67,218,36,58,171,52,96,24,103,775,0,114,83,181,54,65,45 }, +{ 21,32,5,3,2,17,14,72,76,1,12,23,38,51,4,54,10,0,89,13,99,137,45,36,421,115,543,11,22,128,221,48 }, +{ 434,384,268,144,855,940,617,206,332,116,93,911,959,282,203,137,141,489,44,120,10,110,244,36,98,31,269,253,367,417,160,9 }, +{ 15,2,1,0,13,515,5,700,3,23,180,217,141,10,753,4,117,6,77,33,64,7,11,197,352,317,341,134,165,115,12,9 }, +{ 2,113,6,25,1,0,29,4,7,833,5,45,32,61,128,19,77,151,74,145,64,42,14,210,655,106,59,177,27,17,21,738 }, +{ 116,268,918,203,551,31,8,692,206,791,403,499,417,93,940,421,0,23,22,120,13,523,44,51,299,473,959,1,10,475,202,125 }, +{ 107,126,132,612,362,279,20,146,259,493,199,121,590,43,660,147,35,376,939,60,941,534,683,5,0,953,16,7,49,649,595,470 }, +{ 15,515,700,753,13,1,0,23,2,33,77,4,3,51,5,102,115,10,9,341,6,7,11,342,217,12,120,180,40,317,141,8 }, +{ 53,27,17,161,469,378,73,527,19,136,383,250,495,56,862,26,62,84,80,106,200,4,34,14,440,297,3,128,585,5,129,123 }, +{ 17,45,209,106,5,207,243,454,119,255,2,263,186,290,29,3,21,62,425,61,84,32,58,56,48,408,655,136,306,14,742,227 }, +{ 4,152,59,452,128,79,216,11,339,471,529,188,94,77,202,12,291,33,318,377,99,51,23,5,402,349,32,474,102,13,205,111 }, +{ 15,515,700,753,1,0,2,13,3,5,23,4,180,51,115,9,6,12,7,8,197,33,10,961,901,77,141,752,110,22,120,341 }, +{ 951,752,638,811,351,642,180,253,10,341,197,901,110,873,8,244,15,352,165,898,143,515,564,762,499,55,365,700,82,753,141,854 }, +{ 6,262,197,350,74,26,115,509,841,583,165,38,21,13,47,50,235,19,33,324,453,4,308,196,138,99,64,903,675,1,223,130 }, +{ 125,165,391,23,386,221,21,13,558,457,51,867,197,115,401,758,77,97,308,791,7,180,48,120,963,451,743,89,603,134,403,450 }, +{ 1,14,2,5,16,46,7,38,58,24,50,0,69,48,35,67,54,18,12,75,21,45,513,155,430,37,270,9,61,163,223,32 }, +{ 23,13,51,0,12,15,4,1,115,2,515,453,10,457,5,3,202,21,165,700,403,11,37,64,77,401,9,197,753,59,475,99 }, +{ 129,84,17,56,27,495,19,548,80,123,162,378,3,504,161,469,618,73,40,53,4,26,205,184,106,183,62,6,257,128,862,12 }, +{ 28,9,22,49,109,1,67,0,39,55,168,158,83,36,35,86,420,194,185,159,95,105,69,208,272,103,50,114,2,254,169,30 }, +{ 242,391,8,456,116,13,23,492,341,165,867,51,499,457,479,638,338,509,719,10,1,642,417,762,401,93,206,268,901,569,22,197 }, +{ 211,162,248,130,57,4,41,556,507,266,183,152,305,361,11,129,62,229,38,471,514,313,157,300,377,3,440,128,123,328,339,59 }, +{ 7,92,97,16,298,140,60,126,14,35,279,314,232,246,43,230,508,173,71,107,423,24,150,779,20,189,66,18,607,21,0,653 }, +{ 15,515,700,753,1,0,2,13,23,5,3,180,51,901,6,4,7,12,9,115,8,457,165,82,120,197,10,64,141,341,22,117 }, +{ 0,18,403,25,523,74,6,24,42,91,22,102,13,51,49,193,475,681,95,85,730,64,899,397,273,750,247,673,32,805,757,288 }, +{ 56,0,18,65,33,554,84,343,64,6,90,561,22,19,899,108,27,63,289,475,240,467,370,32,233,214,24,123,95,287,28,194 }, +{ 31,98,127,9,0,44,293,105,395,299,49,242,28,22,599,116,1,284,276,125,456,685,763,159,272,623,23,935,393,144,201,137 }, +{ 1,5,2,14,38,46,50,48,21,7,58,45,270,61,155,171,0,290,69,32,29,54,67,16,24,666,663,17,37,75,502,52 }, +{ 23,51,13,453,457,12,719,4,15,99,401,2,961,3,11,730,475,515,0,1,165,115,629,700,14,17,403,40,5,33,37,64 }, +{ 968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937 }, +{ 2,1,14,29,67,103,6,46,52,75,24,133,38,218,83,309,36,108,70,114,96,5,238,74,25,26,220,236,65,50,69,87 }, +{ 7,71,16,92,24,60,14,97,150,140,35,189,149,298,18,230,43,508,2,423,69,0,38,314,66,279,399,517,251,20,232,273 }, +{ 23,1,120,51,13,202,77,141,260,21,15,5,128,82,2,450,269,165,102,318,48,32,137,515,125,64,12,115,351,180,33,7 }, +{ 77,13,33,23,64,51,4,102,141,128,40,1,2,202,0,6,177,115,137,15,59,10,11,7,269,22,515,180,318,3,700,95 }, +{ 101,9,18,363,264,520,411,604,676,682,905,271,16,821,167,0,621,364,39,100,121,118,166,781,647,252,1,848,447,265,404,60 }, +{ 144,203,326,382,166,418,93,88,96,822,1,141,859,77,744,438,110,269,921,367,521,274,100,39,494,120,403,473,217,576,13,291 }, +{ 13,21,180,125,5,23,191,32,18,16,146,199,115,24,165,118,0,225,22,1,60,197,64,901,375,241,48,12,408,71,522,818 }, +{ 15,515,700,753,13,0,23,8,1,51,82,102,2,33,4,9,180,165,5,77,10,110,12,197,120,260,18,326,351,403,22,457 }, +{ 33,77,102,64,13,23,128,51,141,202,1,40,0,2,117,10,15,4,6,318,269,134,22,515,180,115,177,153,137,196,3,700 }, +{ 174,544,104,525,74,0,151,25,6,624,29,66,2,636,81,45,204,177,64,416,7,644,5,138,222,319,355,77,22,122,789,216 }, +{ 141,304,372,352,291,947,177,269,128,954,77,349,217,202,64,318,498,437,102,864,86,13,115,180,137,5,210,197,32,950,678,7 }, +{ 161,200,53,17,714,27,34,73,472,62,585,56,440,383,136,78,527,19,4,3,106,361,14,250,80,514,377,84,322,390,862,548 }, +{ 32,76,72,21,38,14,89,54,12,37,2,241,5,428,17,1,181,221,350,45,3,4,449,90,148,179,99,292,794,770,477,46 }, +{ 33,23,128,64,141,13,77,51,102,202,2,15,1,3,40,10,5,153,269,515,165,0,117,196,180,318,6,700,137,134,120,22 }, +{ 96,137,30,0,9,39,840,202,669,406,141,530,613,1,180,88,22,160,679,576,28,403,31,219,49,228,829,100,36,15,10,856 }, +{ 180,141,352,1,15,752,115,0,217,365,2,515,13,901,341,317,23,4,197,700,269,5,3,31,753,244,21,165,253,202,51,44 }, +{ 1,2,67,0,28,50,83,65,14,46,103,114,24,38,36,9,69,5,18,7,22,133,55,218,16,124,29,54,96,160,12,480 }, +{ 180,115,352,317,365,217,752,901,141,15,341,1,515,253,700,0,753,873,2,197,31,137,165,244,4,120,160,44,98,5,202,3 }, +{ 5,32,347,49,13,21,95,713,23,1,77,33,60,64,107,4,126,928,296,850,0,241,197,102,652,195,180,534,165,153,379,10 }, +{ 341,180,365,901,317,115,15,752,515,700,217,873,753,82,0,110,197,141,951,165,1,564,13,351,253,12,10,3,2,4,308,244 }, +{ 17,45,21,3,106,5,155,38,227,32,2,209,62,54,12,243,14,181,552,587,46,540,207,794,37,48,430,119,255,221,770,29 }, +{ 16,24,35,18,7,0,50,1,9,14,75,69,2,5,12,21,60,13,67,71,23,48,10,108,223,181,189,103,46,64,92,51 }, +{ 127,13,98,165,308,23,286,293,258,51,219,395,197,115,301,401,31,391,22,105,457,170,239,276,55,338,629,116,180,479,509,569 }, +{ 539,213,748,840,957,669,30,466,88,217,144,251,863,190,137,93,230,228,679,352,317,203,617,321,258,530,160,219,96,831,816,689 }, +{ 5,48,1,21,2,14,0,36,12,38,32,54,430,181,50,270,72,99,281,45,17,10,46,22,37,218,67,3,290,76,23,51 }, +{ 13,23,0,4,33,51,2,115,141,1,77,217,180,10,9,317,3,102,11,5,15,197,7,202,22,165,40,64,515,6,341,31 }, +{ 13,15,117,515,23,12,37,134,165,700,38,54,457,753,51,64,153,197,14,10,33,82,961,0,99,89,115,719,141,3,4,1 }, +{ 5,21,2,3,1,32,14,12,48,17,0,10,51,23,38,22,4,72,13,54,36,45,137,76,99,114,86,37,11,64,540,430 }, +{ 202,128,77,318,291,33,269,102,275,141,494,342,40,678,0,177,20,210,402,7,4,5,137,6,13,450,403,32,49,120,23,22 }, +{ 1,2,24,14,67,46,69,50,38,103,16,18,75,35,83,29,52,96,5,108,0,7,54,71,149,394,236,309,70,133,220,58 }, +{ 15,515,1,700,0,2,753,13,23,5,180,3,51,4,165,457,12,197,115,6,7,21,9,141,8,901,33,82,120,77,10,110 }, +{ 0,28,65,14,67,2,124,24,1,9,7,69,55,154,36,16,46,114,175,35,83,22,429,18,109,149,68,189,108,336,251,133 }, +{ 56,162,403,3,129,775,99,161,17,40,527,33,880,4,14,128,475,12,548,23,102,202,361,117,34,184,383,200,183,196,64,53 }, +{ 151,2,29,58,112,45,186,113,5,70,52,1,311,6,315,66,61,7,74,27,631,17,80,87,287,243,209,227,14,491,19,869 }, +{ 6,1,74,2,75,29,25,66,26,70,52,138,67,324,357,42,19,220,14,85,87,108,38,451,309,103,24,69,380,135,114,65 }, +{ 15,515,700,13,23,0,1,120,753,51,180,2,260,202,5,141,77,102,9,450,115,21,197,165,7,137,110,33,12,269,901,4 }, +{ 5,45,17,2,14,46,48,38,181,50,155,3,186,54,61,29,21,227,281,80,540,106,12,400,52,1,58,32,328,171,209,487 }, +{ 16,18,265,121,158,35,60,9,39,7,329,105,252,68,24,1,132,167,159,22,0,49,286,101,21,146,23,327,120,709,5,14 }, +{ 108,467,283,56,389,650,123,412,33,177,899,475,216,453,269,349,619,65,51,730,403,670,23,196,523,128,84,13,401,789,503,543 }, +{ 514,3,11,377,328,4,361,507,57,403,14,880,130,485,176,215,236,38,152,102,211,56,62,757,54,585,300,556,34,555,40,229 }, +{ 3,555,62,266,130,99,507,139,514,12,152,229,215,305,57,40,440,33,403,471,38,56,475,14,361,313,775,328,196,548,123,23 }, +{ 120,202,318,15,77,13,1,450,33,269,515,260,5,128,494,51,23,700,102,141,40,753,326,403,817,137,523,21,177,922,342,7 }, +{ 15,1,515,23,0,13,700,2,51,753,180,5,165,21,197,12,3,120,115,4,141,6,9,7,457,33,386,202,82,8,31,341 }, +{ 15,180,515,82,351,700,10,317,753,115,217,365,141,898,33,901,13,23,110,854,752,77,1,197,4,341,143,36,64,352,102,9 }, +{ 104,289,66,707,214,90,712,64,97,173,20,0,414,194,874,43,32,7,568,560,65,38,426,312,715,192,376,74,835,5,324,147 }, +{ 84,56,0,554,63,65,453,249,123,643,18,26,847,475,511,403,416,561,524,289,370,73,9,19,45,42,719,194,27,467,33,730 }, +{ 21,346,13,350,308,826,197,101,352,68,570,0,165,23,9,841,115,100,509,694,221,230,35,217,569,88,124,749,1,777,212,154 }, +{ 16,92,7,24,60,18,35,140,126,14,50,71,46,330,2,75,246,5,121,267,571,1,230,309,220,0,9,64,146,236,54,108 }, +{ 82,15,515,898,365,700,180,33,341,753,77,901,10,115,55,351,21,5,1,4,13,102,36,217,2,165,752,120,197,117,11,317 }, +{ 16,24,35,18,69,71,140,1,103,7,189,68,0,50,9,108,2,133,60,267,230,46,149,67,167,118,92,14,75,21,191,38 }, +{ 60,71,16,18,7,20,43,118,35,68,375,28,608,0,175,566,154,92,14,149,628,33,22,13,2,10,279,23,107,356,55,117 }, +{ 187,258,871,295,201,434,219,224,489,384,268,110,261,839,44,699,93,116,36,131,141,228,144,160,940,567,244,406,137,574,98,253 }, +{ 66,7,97,172,192,712,232,324,204,74,43,448,387,426,568,20,526,107,104,135,356,729,173,0,22,5,32,95,2,64,500,560 }, +{ 15,515,700,753,1,4,0,341,13,3,134,2,5,33,11,77,12,10,23,197,365,901,7,40,217,32,21,6,51,180,961,37 }, +{ 0,28,24,9,35,65,16,124,68,55,109,154,7,39,22,149,158,14,175,1,49,252,18,71,2,168,289,419,108,420,67,101 }, +{ 7,16,14,92,2,46,140,24,220,35,38,60,75,1,50,18,87,54,5,126,29,52,278,262,314,107,71,21,172,135,330,394 }, +{ 7,92,16,14,172,126,2,60,140,35,135,314,278,46,24,38,232,107,330,66,5,18,150,246,230,97,52,1,121,563,279,21 }, +{ 6,26,235,53,297,436,27,19,25,73,113,445,90,214,65,42,64,289,250,611,624,32,45,648,614,17,85,491,34,122,200,416 }, +{ 352,141,1,217,854,752,351,180,244,36,110,661,82,258,816,160,295,219,567,224,230,269,922,144,260,268,93,201,137,116,489,202 }, +{ 16,60,35,18,126,107,68,191,92,121,7,14,598,20,493,279,167,446,118,0,28,43,463,55,24,212,375,566,9,150,575,21 }, +{ 15,1,515,2,4,13,0,700,3,5,23,753,341,77,51,115,33,11,180,10,197,141,6,165,7,901,102,40,9,202,217,12 }, +{ 23,51,13,202,21,5,1,120,15,137,128,125,32,2,12,141,33,165,64,515,403,318,700,48,180,7,6,450,115,523,475,260 }, +{ 131,716,224,371,219,187,737,616,385,254,9,98,105,924,31,258,836,39,127,578,49,916,44,761,272,137,944,159,0,242,442,22 }, +{ 15,515,700,1,753,2,5,0,4,13,3,180,11,141,197,10,341,217,33,134,165,6,77,7,317,12,352,64,365,32,102,40 }, +{ 66,74,7,173,174,29,192,2,222,20,226,43,353,52,712,6,0,138,500,204,97,145,64,104,426,673,355,90,25,5,65,87 }, +{ 5,259,786,534,590,493,279,49,13,581,465,21,929,35,941,132,147,32,23,612,362,626,107,121,178,0,146,61,48,939,10,18 }, +{ 2,14,16,7,278,69,135,140,46,24,267,35,92,38,1,189,29,52,309,60,66,75,71,172,74,357,18,87,67,6,230,5 }, +{ 165,13,308,197,391,23,401,15,51,457,180,509,115,569,3,629,961,719,34,758,317,734,14,29,46,2,17,901,38,453,5,217 }, +{ 1,22,2,14,0,28,7,168,67,49,65,24,36,95,5,105,55,35,12,46,69,16,114,159,194,50,10,9,158,83,164,109 }, +{ 34,453,3,196,130,14,322,11,47,51,377,236,361,4,730,153,514,711,57,440,62,17,161,108,176,59,485,56,162,412,202,117 }, +{ 18,16,21,23,48,13,24,35,121,5,156,60,51,1,7,132,141,221,163,115,0,271,447,340,363,202,125,71,2,781,22,698 }, +{ 165,13,457,23,197,961,629,569,341,41,12,38,401,901,54,51,115,17,15,509,421,37,62,45,719,57,32,328,117,758,157,99 }, +{ 2,1,77,141,33,64,3,102,0,23,13,5,128,10,6,15,180,202,269,40,51,515,7,165,137,117,318,4,700,153,197,352 }, +{ 68,212,0,124,101,9,154,16,562,191,21,149,65,24,35,1,118,167,818,350,520,100,722,841,264,71,13,302,478,23,375,346 }, +{ 98,23,48,598,13,293,541,21,125,121,51,807,0,31,35,259,126,7,386,1,223,783,10,107,199,20,221,144,342,963,49,64 }, +{ 21,13,5,586,1,23,167,48,33,781,647,49,165,18,51,271,77,32,761,118,0,82,391,22,146,141,459,31,197,156,115,4 }, +{ 2,1,5,61,29,7,58,45,14,6,425,32,70,52,290,738,207,21,72,112,66,76,655,17,186,46,64,263,38,0,128,87 }, +{ 39,265,9,100,1,333,363,101,18,411,447,254,166,310,31,98,264,30,639,404,156,286,16,93,593,203,272,682,0,905,44,821 }, +{ 6,2,1,19,29,51,26,108,25,74,5,23,14,114,13,386,133,103,42,66,453,70,309,138,719,324,65,38,64,96,52,75 }, +{ 20,43,356,107,49,858,595,7,414,359,0,5,392,319,97,612,422,819,14,376,173,246,22,470,147,427,230,92,197,33,683,95 }, +{ 0,9,68,35,65,67,114,101,28,1,124,175,336,69,154,103,83,24,189,133,39,16,50,7,2,149,55,251,18,345,230,36 }, +{ 23,13,51,15,0,1,515,115,165,2,5,12,700,202,4,21,141,457,753,197,10,3,180,120,32,9,318,11,453,64,6,269 }, +{ 121,195,60,16,126,107,98,271,146,407,132,35,1,167,199,223,493,191,279,20,18,5,43,7,21,92,48,393,0,362,212,467 }, +{ 31,44,299,116,393,144,492,456,268,22,105,0,367,918,384,434,127,489,98,9,963,125,242,948,1,28,206,49,36,51,93,293 }, +{ 23,13,457,51,165,401,719,758,197,453,961,629,308,14,15,12,730,3,386,569,391,29,739,515,34,828,832,901,115,514,670,341 }, +{ 105,36,131,22,180,115,341,127,169,1,9,31,64,98,44,365,317,141,272,143,160,55,219,86,197,776,239,187,0,535,13,752 }, +{ 1,6,2,14,66,25,29,5,108,67,65,114,19,38,26,52,74,7,24,18,69,86,36,388,64,51,17,83,23,46,42,75 }, +{ 51,386,23,453,719,13,730,6,457,670,758,19,401,165,2,475,47,26,899,14,108,17,1,5,197,29,894,754,236,74,27,285 }, +{ 252,18,9,101,121,16,132,0,419,167,364,60,604,35,265,363,146,271,39,158,68,109,28,329,848,24,647,907,682,159,212,55 }, +{ 283,503,128,432,26,193,63,269,789,529,102,122,389,275,678,6,25,318,445,4,342,27,573,605,177,862,643,291,216,57,235,59 }, +{ 2,1,29,75,69,52,14,6,46,74,87,7,220,226,278,38,135,66,267,70,16,262,25,24,380,324,357,140,67,394,97,222 }, +{ 97,298,69,7,66,140,189,24,16,267,172,423,60,150,14,314,92,71,81,501,43,35,74,6,517,232,149,607,83,330,18,2 }, +{ 475,421,403,899,51,805,523,958,453,817,23,615,401,801,120,326,202,670,494,730,450,386,115,629,260,576,77,365,569,0,165,13 }, +{ 7,20,14,128,77,97,112,202,2,177,16,415,269,318,275,66,107,43,141,414,135,38,307,10,58,0,6,291,32,5,4,40 }, +{ 24,14,7,0,2,1,22,28,16,65,168,124,35,67,108,109,18,49,10,149,69,158,5,95,289,12,55,6,36,71,46,21 }, +{ 26,80,27,73,122,25,19,17,6,42,684,209,445,573,667,106,45,690,4,611,255,680,297,495,65,59,128,119,483,113,64,53 }, +{ 107,259,362,376,465,20,470,147,595,534,612,683,660,43,5,49,581,0,858,35,427,246,97,786,178,356,14,21,142,878,7,279 }, +{ 131,30,228,190,856,406,224,88,219,530,863,613,778,274,944,816,187,39,100,160,258,31,44,93,1,321,539,36,871,137,435,531 }, +{ 113,6,311,25,45,491,80,611,27,26,209,667,17,73,122,42,684,396,19,85,106,5,614,4,2,255,151,29,1,64,648,61 }, +{ 15,515,700,753,0,1,23,51,120,2,13,82,5,260,9,4,341,77,180,115,141,10,7,12,450,8,202,901,197,351,165,93 }, +{ 219,127,98,258,395,421,924,293,242,201,697,105,276,51,308,23,453,272,401,944,512,137,13,31,284,567,386,365,116,131,964,125 }, +{ 15,180,352,141,515,752,217,82,1,317,854,700,351,753,115,341,110,13,260,120,21,36,33,898,23,10,5,365,4,160,901,137 }, +{ 129,123,17,257,162,184,205,249,183,769,5,80,3,4,229,130,119,45,90,99,618,106,57,497,12,128,2,84,59,152,27,40 }, +{ 33,102,23,77,64,128,51,13,0,202,10,141,40,15,1,22,117,137,2,86,4,403,269,153,515,196,65,11,700,115,99,5 }, +{ 7,14,2,16,172,107,46,92,5,135,35,202,294,87,38,232,29,97,20,21,24,1,60,220,66,43,12,0,126,52,54,70 }, +{ 403,576,615,523,475,326,805,817,494,421,51,202,120,450,137,453,23,859,260,401,402,77,33,670,0,958,15,197,386,515,165,480 }, +{ 141,352,217,137,0,180,202,349,9,269,23,51,115,291,77,372,13,317,120,752,365,351,93,22,2,341,64,10,82,854,28,18 }, +{ 1,23,13,51,202,141,5,165,21,15,120,180,64,2,197,125,33,102,12,7,137,515,48,128,269,318,93,700,0,403,9,4 }, +{ 25,151,6,145,122,29,174,45,113,74,4,665,42,138,2,614,416,287,19,348,746,0,66,26,1,7,64,243,311,396,81,624 }, +{ 30,190,254,166,100,382,731,829,88,131,264,795,9,93,625,274,438,1,578,613,716,31,44,39,530,36,616,921,265,203,160,77 }, +{ 132,5,21,13,1,23,32,195,379,687,156,121,626,296,48,70,850,146,51,82,883,771,35,49,652,407,60,4,260,0,845,33 }, +{ 9,254,0,49,272,131,39,159,688,101,105,578,518,158,286,28,327,333,68,224,252,219,344,16,22,1,716,31,30,228,24,890 }, +{ 16,7,35,60,18,20,14,68,9,0,28,118,43,92,126,55,107,2,101,154,24,71,5,202,121,109,22,252,21,97,1,621 }, +{ 15,515,700,753,13,1,341,2,0,4,3,5,11,23,10,33,117,12,901,197,6,134,77,8,165,317,21,365,217,7,17,40 }, +{ 78,19,444,47,26,390,27,453,130,813,108,730,711,65,412,122,51,680,113,235,690,196,630,283,128,236,14,64,73,53,200,445 }, +{ 2,7,29,5,61,6,45,1,66,113,112,14,52,315,738,128,32,151,74,16,20,64,70,21,592,0,25,4,425,43,491,222 }, +{ 145,112,74,66,6,29,26,70,19,396,25,87,2,287,135,151,138,222,5,226,42,122,7,307,1,644,45,58,113,651,635,632 }, +{ 92,16,7,60,126,24,140,35,14,232,18,121,246,71,46,267,172,150,107,314,132,146,230,2,278,108,330,199,236,5,38,572 }, +{ 13,115,197,538,569,341,98,55,165,127,365,762,219,286,844,23,170,206,734,638,535,901,169,253,629,0,873,509,180,10,332,258 }, +{ 58,151,74,53,287,27,29,396,6,70,2,73,5,52,112,26,651,1,297,113,17,75,19,45,334,445,145,34,315,549,436,331 }, +{ 214,289,90,874,104,751,64,65,312,835,204,249,750,194,74,81,875,32,519,288,348,0,174,247,636,715,138,192,784,6,524,280 }, +{ 9,39,28,35,30,0,166,49,1,175,439,158,64,346,36,101,67,364,86,88,274,100,168,55,23,10,420,22,190,141,505,180 }, +{ 341,901,15,515,700,753,1,365,10,0,569,180,2,197,115,31,165,3,5,4,44,22,317,13,9,951,23,253,116,143,762,93 }, +{ 120,202,77,450,260,15,128,318,102,515,494,13,817,700,269,5,403,51,1,33,23,753,82,326,141,342,291,137,21,523,351,32 }, +{ 13,115,241,64,180,32,125,197,165,4,118,22,21,23,16,247,237,28,225,191,95,141,167,5,0,341,288,35,459,18,177,24 }, +{ 16,24,35,14,1,2,7,69,18,46,60,50,267,140,71,189,108,38,75,92,0,5,9,230,67,21,309,335,54,236,394,220 }, +{ 15,515,700,753,898,180,901,341,197,638,10,165,33,1,115,4,77,365,317,13,102,217,117,0,5,2,253,3,82,569,21,752 }, +{ 193,523,18,84,56,730,233,65,4,817,90,33,643,403,91,511,453,240,59,11,214,51,719,196,153,475,32,123,64,847,102,561 }, +{ 112,29,151,2,74,6,66,7,222,145,287,45,5,624,52,25,113,416,58,122,19,70,186,204,4,87,644,549,337,884,32,0 }, +{ 13,0,23,2,1,15,33,3,77,515,141,5,4,217,10,51,64,180,700,115,6,117,11,7,753,40,102,165,197,22,317,153 }, +{ 28,0,1,67,65,9,2,114,83,69,103,50,36,22,55,24,46,14,124,109,35,7,16,38,133,160,389,323,18,12,154,5 }, +{ 121,132,18,167,271,146,101,363,621,9,411,647,16,354,520,60,212,932,1,806,55,0,195,446,68,35,31,364,777,252,407,118 }, +{ 26,6,85,396,122,624,25,19,42,445,64,648,573,416,174,680,665,214,45,348,90,65,194,145,113,881,138,289,112,436,297,544 }, +{ 16,146,18,92,24,199,60,71,121,126,35,108,156,953,271,674,132,7,32,640,360,246,649,118,21,95,5,517,14,9,1,314 }, +{ 51,13,23,453,475,730,719,15,457,403,64,115,33,95,4,523,3,12,21,6,899,102,5,128,401,202,11,141,308,515,22,125 }, +{ 151,396,6,53,27,113,58,26,73,112,74,287,45,29,297,19,145,70,138,445,315,436,34,2,17,573,5,61,549,491,1,80 }, +{ 223,1,888,774,260,98,269,385,349,202,96,141,421,622,730,863,318,697,87,453,393,418,922,834,751,5,163,335,120,291,352,30 }, +{ 16,60,92,35,126,121,7,150,246,18,107,1,598,24,167,195,14,97,71,279,98,441,191,199,517,146,356,223,298,271,230,0 }, +{ 22,1,105,28,239,170,0,55,95,31,36,301,2,320,98,127,9,49,44,64,35,67,10,86,5,12,109,23,168,13,21,312 }, +{ 2,6,5,207,292,76,1,119,45,32,17,29,61,306,790,58,240,106,14,64,214,151,476,710,7,72,84,128,4,179,70,25 }, +{ 51,23,221,254,115,13,438,530,125,48,21,39,541,960,386,49,1,613,15,840,228,308,627,131,688,401,5,326,421,158,165,83 }, +{ 1,5,2,0,12,22,21,36,10,14,48,86,23,13,32,54,3,4,28,65,51,50,137,37,208,114,9,38,17,7,281,202 }, +{ 363,23,447,182,296,340,1,93,698,478,379,156,284,144,18,269,21,98,141,70,668,411,664,658,110,914,67,937,180,691,335,291 }, +{ 17,32,45,498,41,115,180,197,106,62,54,38,546,165,13,155,468,509,341,243,241,217,542,15,57,536,428,51,117,721,292,129 }, +{ 32,95,64,246,22,92,180,13,5,652,125,241,638,237,7,49,4,126,21,115,197,296,888,316,0,165,774,23,16,392,1,534 }, +{ 15,515,700,753,33,341,13,217,4,141,77,23,180,317,1,10,102,351,82,115,40,5,854,21,137,11,352,901,365,117,197,0 }, +{ 15,120,1,82,93,217,515,260,77,141,13,110,700,351,352,23,180,753,21,854,202,317,64,349,269,51,165,137,5,128,291,36 }, +{ 13,23,51,141,77,0,33,4,115,64,2,10,102,202,217,128,1,177,269,11,7,22,6,21,32,9,180,40,15,3,165,318 }, +{ 478,264,1,520,98,724,9,682,223,664,21,759,13,772,604,100,23,363,411,48,821,5,0,905,909,447,31,265,88,101,166,39 }, +{ 20,29,7,2,77,416,6,128,33,5,0,113,104,32,43,13,491,66,23,21,102,51,74,210,202,525,64,318,10,81,174,14 }, +{ 2,1,5,14,7,58,61,29,45,290,46,38,52,21,32,270,6,592,425,0,75,155,16,48,17,50,72,70,207,24,263,663 }, +{ 80,6,17,209,106,26,483,113,19,469,255,25,378,27,495,833,45,64,161,2,61,667,76,742,32,90,445,5,814,65,887,119 }, +{ 98,223,393,1,354,834,195,791,447,697,284,293,360,541,781,156,51,807,18,664,421,411,163,668,48,31,591,765,883,386,948,23 }, +{ 679,141,816,36,93,406,876,144,228,137,1,180,669,21,332,251,5,269,116,187,96,351,202,752,317,64,203,831,574,466,855,345 }, +{ 15,515,700,13,1,753,2,0,23,341,3,5,4,10,51,11,33,165,6,7,115,197,12,64,180,153,217,77,9,569,901,317 }, +{ 13,23,202,51,5,21,403,15,120,64,1,450,128,141,12,523,33,165,494,125,2,515,269,7,48,102,318,95,260,180,453,197 }, +{ 16,18,24,60,71,92,146,246,199,35,140,7,9,118,121,108,167,230,126,132,0,640,156,14,68,133,267,360,649,271,64,55 }, +{ 269,141,678,177,202,77,128,318,33,947,40,120,291,349,102,137,64,352,210,864,461,498,13,342,196,23,275,450,954,0,205,111 }, +{ 16,24,92,18,71,60,35,7,108,191,167,246,140,14,126,21,1,68,150,118,149,388,399,9,273,0,121,796,230,48,212,517 }, +{ 2,14,1,29,46,75,52,70,69,171,38,7,58,163,16,5,24,220,67,112,223,54,50,409,155,35,267,186,151,334,394,140 }, +{ 9,252,100,265,166,39,88,404,329,0,1,520,382,812,101,593,264,274,604,676,30,118,68,553,18,664,363,23,639,865,21,411 }, +{ 16,18,35,24,0,60,158,7,22,68,14,49,109,159,55,9,28,71,2,10,5,105,1,118,329,13,344,23,92,20,21,126 }, +{ 15,13,515,700,23,0,753,1,51,2,4,10,77,5,3,197,115,165,961,202,9,457,180,12,141,22,33,120,6,11,318,31 }, +{ 160,93,251,137,317,1,180,36,120,217,345,752,617,352,332,10,96,531,498,318,365,202,141,269,816,341,901,679,143,35,83,968 }, +{ 6,25,42,128,19,59,122,4,85,26,611,27,269,233,45,0,343,91,318,80,11,177,283,73,33,614,2,77,64,138,445,216 }, +{ 95,64,74,7,32,81,51,204,0,20,237,65,56,38,91,23,207,180,347,343,29,6,511,52,49,10,25,18,554,370,14,312 }, +{ 202,120,326,260,450,817,494,318,137,403,128,77,523,553,859,5,704,1,15,23,13,576,7,16,615,51,682,291,515,0,21,234 }, +{ 20,43,107,356,362,126,595,92,359,7,422,319,493,16,858,5,392,246,414,683,60,0,35,945,441,21,259,819,49,97,279,173 }, +{ 25,42,6,77,33,102,0,122,4,690,29,483,210,27,21,19,2,300,18,648,680,119,117,59,1,10,342,12,26,153,91,684 }, +{ 31,44,299,116,125,242,456,599,22,393,0,144,492,28,268,1,9,963,301,105,367,36,127,170,384,434,206,98,918,10,13,93 }, +{ 410,521,686,367,662,88,335,321,201,96,98,772,144,1,934,921,443,435,284,274,264,551,120,897,44,100,33,225,744,418,909,960 }, +{ 142,178,878,234,132,786,195,202,77,416,147,929,146,522,167,259,687,639,450,271,626,481,590,5,198,212,771,49,0,465,315,427 }, +{ 254,39,131,9,272,0,578,716,310,224,30,49,105,827,518,829,166,333,616,228,613,846,101,219,1,31,890,98,159,938,252,100 }, +{ 230,699,854,473,450,351,831,137,855,217,352,704,800,202,251,498,160,144,206,203,317,201,253,752,418,141,1,332,82,180,443,36 }, +{ 403,202,475,453,494,23,51,77,318,402,13,33,128,102,137,141,120,342,269,0,450,4,899,576,40,421,275,117,217,177,196,64 }, +{ 23,44,98,182,291,144,116,39,110,141,96,82,905,70,367,264,125,93,77,411,120,1,658,202,100,415,107,363,197,30,447,105 }, +{ 15,515,1,13,700,23,77,120,0,753,51,180,202,141,260,5,21,115,2,137,128,9,450,197,365,269,12,326,110,102,318,7 }, +{ 0,32,18,95,207,577,193,29,61,104,64,784,715,102,693,887,81,91,583,671,403,5,52,474,397,180,138,49,37,344,38,263 }, +{ 0,101,9,68,252,16,100,39,166,364,124,24,154,265,212,88,18,35,329,419,28,118,71,30,65,158,191,55,1,694,21,676 }, +{ 16,24,191,18,35,71,167,118,149,68,212,9,0,1,21,108,101,92,60,375,302,7,589,755,124,674,350,48,562,246,13,363 }, +{ 2,14,69,24,1,67,46,16,38,103,29,267,7,35,189,135,278,71,108,18,83,309,52,6,149,388,75,236,60,0,150,66 }, +{ 13,1,23,0,4,2,51,15,180,33,3,115,5,515,141,10,77,700,11,9,197,341,202,165,217,102,22,7,753,317,365,6 }, +{ 447,1,698,411,31,363,98,5,919,4,156,125,759,691,13,64,459,354,44,21,48,293,30,914,478,225,82,120,2,922,848,839 }, +{ 854,82,351,217,141,180,352,15,515,752,1,700,317,898,753,244,10,21,922,115,77,36,4,260,64,110,372,13,5,365,120,11 }, +{ 0,1,4,13,5,2,82,33,3,120,10,23,9,11,77,260,21,102,8,31,40,6,351,51,64,450,22,117,93,110,7,457 }, +{ 51,13,403,23,12,475,1,2,21,5,453,523,115,202,817,7,0,99,3,6,450,120,494,64,22,95,49,899,10,37,32,141 }, +{ 180,752,352,141,498,864,317,217,9,0,115,237,230,39,30,197,83,1,930,64,35,365,372,13,579,88,702,36,101,901,482,21 }, +{ 16,24,0,7,22,18,28,35,14,158,71,2,109,60,1,168,49,154,124,68,10,55,92,118,159,9,5,747,95,105,65,6 }, +{ 15,515,700,753,0,23,1,13,341,51,120,77,141,4,137,33,260,82,202,9,180,5,351,2,291,10,11,901,21,115,40,128 }, +{ 24,68,35,149,18,16,0,7,9,14,189,108,69,65,67,1,71,2,118,28,140,101,114,336,230,124,175,133,46,55,251,154 }, +{ 13,308,197,115,125,9,165,237,391,23,509,569,546,28,49,629,22,338,317,254,749,180,468,159,903,386,217,352,558,39,36,734 }, +{ 64,90,32,217,77,4,141,216,172,312,128,13,86,33,597,147,352,95,5,115,875,22,59,11,102,14,182,437,97,177,874,707 }, +{ 1,22,36,0,105,28,2,67,95,49,55,5,239,12,86,9,83,170,312,64,31,21,23,10,164,50,114,159,208,13,7,320 }, +{ 9,18,16,0,159,105,101,252,49,310,24,68,35,39,265,272,7,1,60,28,455,890,329,557,118,286,55,137,327,167,5,13 }, +{ 129,123,214,249,618,17,5,257,205,184,460,76,2,162,769,245,90,106,128,45,119,1,183,4,3,12,179,64,6,229,99,209 }, +{ 51,453,719,457,23,13,730,899,475,386,4,15,11,12,670,196,515,523,961,401,153,3,700,99,753,117,403,32,120,165,57,0 }, +{ 173,66,192,204,20,74,104,636,7,43,289,426,825,712,560,214,81,750,65,97,707,0,90,414,64,348,32,500,22,861,95,6 }, +{ 13,23,1,51,5,21,141,120,202,15,165,2,515,180,12,125,0,64,82,700,197,269,32,48,260,128,115,93,9,137,33,753 }, +{ 200,34,322,78,472,390,27,714,19,14,136,161,453,176,236,444,59,3,62,128,108,57,283,862,73,53,47,17,412,813,4,56 }, +{ 33,347,66,204,426,498,56,172,97,95,5,681,546,22,10,0,135,180,4,241,19,174,6,353,263,21,7,370,42,197,27,808 }, +{ 131,224,219,187,385,371,258,442,254,737,31,98,836,127,924,944,44,871,908,716,39,827,201,574,116,137,36,1,276,242,578,616 }, +{ 1,15,2,180,4,141,13,515,0,5,3,115,700,901,341,23,217,352,753,51,197,77,317,33,365,752,165,21,6,7,269,93 }, +{ 370,91,718,74,81,510,397,66,636,240,355,84,138,511,18,278,6,681,701,289,90,5,214,582,64,104,0,643,192,65,750,32 }, +{ 15,515,700,753,13,120,1,0,165,2,197,23,260,180,4,82,51,386,8,5,12,10,3,141,351,341,326,9,450,7,64,6 }, +{ 32,2,76,5,1,292,72,45,476,214,21,241,29,14,17,48,129,90,179,460,464,123,290,148,519,205,3,263,249,38,710,89 }, +{ 13,165,115,17,197,569,23,509,457,45,32,41,106,180,62,38,659,734,155,536,341,629,961,873,587,54,431,37,391,99,405,428 }, +{ 68,24,35,16,0,101,9,124,154,71,149,65,18,175,28,118,7,55,302,108,92,14,22,346,1,39,429,252,375,364,10,67 }, +{ 0,9,1,68,230,65,35,69,83,23,101,13,141,67,217,352,21,39,16,88,28,124,212,100,115,154,51,64,30,36,10,317 }, +{ 100,265,88,909,410,382,812,593,1,213,321,30,252,230,352,264,9,166,689,39,676,98,21,466,724,639,478,217,13,48,553,101 }, +{ 113,61,198,904,43,0,5,37,899,325,20,59,33,523,204,725,817,389,470,329,222,40,174,58,22,453,690,848,122,104,788,105 }, +{ 16,24,141,18,7,0,71,140,35,269,75,352,12,9,108,217,5,330,60,64,199,70,22,13,486,246,318,133,65,50,23,498 }, +{ 271,167,121,60,18,191,146,199,16,1,446,132,575,212,463,354,126,35,598,566,727,98,107,21,608,955,640,407,5,24,223,68 }, +{ 15,515,115,217,700,13,317,753,141,180,33,23,110,120,4,341,82,10,1,260,365,36,64,854,351,21,51,352,137,77,40,0 }, +{ 173,7,97,356,43,107,20,387,729,104,426,232,560,595,359,392,414,707,885,81,5,0,66,858,612,49,861,14,22,32,819,230 }, +{ 15,515,700,753,13,23,51,82,0,33,165,120,196,4,1,2,197,453,260,351,180,12,40,8,386,110,5,326,9,141,217,457 }, +{ 9,100,120,30,77,795,137,82,202,39,264,827,578,127,0,166,373,318,18,326,141,260,1,450,731,31,33,395,217,291,341,254 }, +{ 14,2,67,1,24,69,0,28,65,7,46,18,114,108,36,83,38,398,9,16,124,133,103,154,50,55,22,267,29,160,35,547 }, +{ 14,7,69,24,66,16,2,267,189,67,71,150,140,97,18,60,172,35,6,1,38,149,388,92,83,135,108,74,462,380,29,36 }, +{ 15,515,700,753,1,13,2,0,4,341,5,3,23,365,11,117,180,10,12,33,134,115,77,197,217,165,6,7,317,102,21,9 }, +{ 1,22,0,12,5,2,36,28,21,10,86,13,23,49,128,9,95,51,55,96,208,141,48,202,4,137,37,64,105,3,50,7 }, +{ 605,630,63,123,736,650,65,108,444,368,561,389,19,25,42,619,122,194,183,27,53,33,84,26,297,813,114,73,256,235,249,216 }, +{ 127,39,9,0,31,371,98,254,1,385,395,44,30,836,187,131,100,116,284,578,299,166,28,21,737,16,276,272,23,49,137,935 }, +{ 2,61,6,29,45,151,1,655,7,207,32,5,112,425,17,76,833,4,14,64,58,106,119,25,113,128,72,52,70,21,292,790 }, +{ 2,5,17,14,3,29,23,27,13,401,46,6,51,58,1,453,45,53,34,52,133,19,236,26,181,114,99,366,151,108,218,38 }, +{ 31,22,170,1,301,44,127,98,36,28,55,105,239,0,338,116,512,299,293,125,86,10,242,395,2,13,9,64,841,23,95,685 }, +{ 1,21,2,14,5,32,48,50,38,270,46,76,290,72,45,54,17,0,155,221,263,207,37,281,430,3,89,12,181,408,36,67 }, +{ 17,106,119,378,84,240,62,80,383,136,306,3,56,790,742,5,207,504,64,440,32,128,45,2,123,209,14,4,61,57,297,667 }, +{ 13,15,1,515,23,2,0,700,4,115,3,51,5,10,753,180,33,341,217,11,165,317,365,197,6,77,40,64,22,9,7,117 }, +{ 772,335,96,744,1,367,662,686,652,897,303,264,521,31,225,410,141,520,260,116,64,44,321,98,144,88,919,966,340,269,349,284 }, +{ 1,0,5,28,36,2,12,22,83,67,65,50,24,14,9,96,21,218,18,114,48,281,54,10,7,160,181,103,37,23,133,99 }, +{ 25,6,145,42,138,81,174,348,525,544,26,74,85,280,287,648,746,91,66,0,29,396,204,64,636,90,122,194,355,104,65,233 }, +{ 16,24,0,18,28,158,7,35,49,22,68,159,55,1,14,109,105,2,9,71,65,154,124,95,424,344,60,239,118,577,21,10 }, +{ 352,854,699,230,93,689,137,144,217,160,251,36,669,202,351,120,617,855,752,203,332,82,450,180,141,748,831,30,258,201,1,816 }, +{ 15,13,515,700,1,2,0,753,23,5,4,3,51,33,10,115,11,317,217,77,180,341,117,165,6,134,197,153,64,9,102,7 }, +{ 104,20,43,173,66,319,0,77,202,7,198,5,97,580,355,74,2,204,174,52,712,234,426,155,102,192,32,4,500,337,226,904 }, +{ 13,23,15,51,1,515,0,2,5,700,141,4,753,165,115,12,3,21,457,10,180,269,32,6,197,202,9,7,120,11,77,33 }, +{ 15,515,700,753,0,1,13,2,23,115,4,317,8,3,5,51,9,341,10,217,22,365,33,457,6,180,77,901,197,120,18,7 }, +{ 15,515,700,753,1,0,2,3,4,13,5,141,23,16,82,217,457,10,365,180,9,317,51,21,269,898,64,202,11,12,318,341 }, +{ 2,5,1,14,50,38,29,17,114,46,133,3,45,21,58,171,181,36,218,12,6,52,0,48,137,65,361,23,155,4,285,51 }, +{ 0,68,9,65,101,124,35,212,16,149,154,100,24,1,114,336,67,589,252,39,71,189,69,562,18,13,30,398,118,88,265,264 }, +{ 1,2,14,22,0,7,67,65,28,36,24,46,168,5,86,69,38,16,49,12,289,10,194,50,83,114,95,6,18,23,55,158 }, +{ 13,23,15,51,515,0,700,4,1,753,2,10,115,961,457,12,33,11,3,5,197,9,165,77,102,403,453,40,64,22,37,59 }, +{ 15,515,700,753,0,1,2,13,23,5,51,901,8,9,180,3,7,82,4,120,12,10,719,341,6,31,141,457,197,22,115,93 }, +{ 100,252,88,101,0,265,9,724,48,1,21,352,213,676,410,382,321,230,30,329,593,909,39,812,553,217,23,689,520,264,166,419 }, +{ 2,5,1,58,171,14,46,50,29,52,45,38,186,155,67,54,151,281,334,61,48,96,17,181,103,400,502,227,21,223,12,69 }, +{ 23,120,13,1,202,141,51,21,165,128,260,15,5,269,137,64,33,180,82,318,93,197,77,326,515,125,110,700,450,2,32,48 }, +{ 341,197,10,901,13,15,8,638,569,515,479,23,180,873,700,165,143,642,0,961,753,951,1,115,509,499,116,12,498,242,82,206 }, +{ 15,1,23,13,515,21,120,51,2,141,202,700,5,180,165,0,753,197,12,7,33,260,352,137,269,4,82,128,48,9,110,6 }, +{ 2,29,50,58,1,6,5,52,14,262,17,46,27,53,151,34,171,74,324,26,38,309,45,113,19,96,287,396,223,67,73,583 }, +{ 13,23,141,51,4,202,0,115,77,2,33,217,5,317,180,64,10,269,3,9,15,21,1,128,102,137,318,11,352,515,22,31 }, +{ 1,67,0,24,50,5,14,18,16,69,2,9,103,35,83,12,96,28,54,7,58,223,21,46,281,48,65,181,22,38,36,108 }, +{ 13,23,141,51,77,64,202,115,33,102,128,4,0,269,10,21,217,32,180,318,9,137,2,11,22,291,7,177,16,31,165,197 }, +{ 317,115,180,365,873,498,217,341,13,752,482,197,569,352,1,901,36,23,457,468,165,346,546,143,509,134,579,876,868,2,332,21 }, +{ 184,257,205,229,152,17,57,497,266,432,452,524,5,619,381,32,4,90,2,12,313,128,45,59,245,106,3,471,129,769,339,214 }, +{ 13,23,1,0,15,2,4,515,51,3,10,33,5,700,115,180,753,77,11,365,341,217,9,6,197,7,102,165,317,40,22,64 }, +{ 626,70,771,687,379,846,767,761,518,878,82,481,31,786,49,591,178,163,407,44,87,13,845,125,590,371,195,120,98,557,937,351 }, +{ 264,1,410,909,772,897,686,521,335,478,98,96,691,639,100,44,284,382,31,321,744,88,914,724,662,765,223,9,682,363,0,367 }, +{ 13,23,1,2,0,15,51,515,5,10,4,33,115,77,180,700,3,141,217,40,6,753,317,197,64,165,7,11,102,9,341,22 }, +{ 141,77,13,64,269,23,115,21,318,217,5,202,102,33,137,2,15,291,177,51,48,180,32,4,515,352,128,7,0,10,96,11 }, +{ 13,15,23,515,51,0,700,753,1,2,4,10,33,11,961,453,115,40,457,14,12,3,9,5,165,401,197,77,22,21,64,102 }, +{ 1,22,0,36,2,31,5,12,13,105,28,9,49,86,141,21,23,95,128,55,44,115,170,10,164,98,180,4,137,239,83,51 }, +{ 100,101,88,0,252,9,265,30,21,39,759,724,213,329,321,13,419,68,562,382,676,352,694,35,553,410,1,166,909,593,230,23 }, +{ 539,88,30,190,321,530,840,144,669,435,957,748,778,100,96,418,203,213,1,131,410,228,466,274,36,382,219,863,613,83,822,352 }, +{ 7,97,92,173,298,107,43,314,232,140,16,356,20,387,729,362,126,359,246,14,230,501,426,441,0,5,560,66,104,779,35,60 }, +{ 2,14,7,1,58,5,46,16,38,70,75,45,24,155,29,0,21,52,61,163,220,50,69,270,35,48,32,171,18,6,64,54 }, +{ 447,411,363,664,647,98,621,1,354,271,223,478,18,777,781,936,360,759,167,132,121,48,21,156,9,195,118,293,23,691,13,264 }, +{ 2,1,14,65,36,67,0,7,46,22,69,5,38,24,28,6,83,29,86,114,168,50,124,208,12,18,108,10,194,484,103,16 }, +{ 421,386,51,791,730,958,165,801,23,453,697,403,615,13,221,523,24,899,401,326,551,670,576,102,18,33,125,77,566,115,203,197 }, +{ 104,319,422,945,0,81,20,43,715,32,784,693,879,7,397,74,306,207,52,681,671,2,61,173,6,636,904,95,887,5,18,192 }, +{ 2,29,1,46,14,52,70,262,6,26,50,67,75,96,309,38,103,112,58,19,5,163,145,83,74,220,223,357,24,69,331,25 }, +{ 786,929,590,771,687,626,941,178,465,259,70,5,13,21,35,534,107,518,132,49,878,48,146,121,379,279,31,767,147,195,108,125 }, +{ 5,2,186,45,17,29,48,50,14,61,46,155,400,1,227,171,52,58,38,54,430,209,80,281,3,106,536,311,181,243,21,502 }, +{ 0,32,64,95,817,494,342,403,207,202,194,389,453,365,312,180,316,5,690,237,848,577,450,61,102,523,475,289,49,241,65,482 }, +{ 23,13,51,15,202,515,1,0,2,12,115,4,700,5,165,753,77,457,21,141,128,10,3,64,403,32,197,318,9,11,33,117 }, +{ 119,2,6,76,5,17,45,292,306,240,32,1,19,84,64,61,4,209,710,80,26,0,106,27,214,25,128,129,29,179,3,113 }, +{ 15,515,700,1,753,0,13,23,180,120,51,2,5,33,165,197,9,450,7,260,115,523,4,12,202,141,82,77,21,102,8,6 }, +{ 6,138,74,280,222,85,66,226,25,42,87,204,64,337,29,135,95,174,235,26,145,65,19,32,792,294,112,52,256,2,5,22 }, +{ 39,9,0,101,333,158,49,252,310,254,272,68,16,18,159,286,344,455,30,109,627,327,24,105,419,100,364,22,35,1,329,709 }, +{ 21,5,32,14,2,1,38,72,76,54,17,3,48,221,270,0,45,46,12,181,37,89,36,50,540,290,430,10,4,741,99,23 }, +{ 120,137,202,269,141,260,318,450,922,494,77,291,82,15,5,351,128,1,515,326,64,854,700,352,342,21,753,678,349,32,523,90 }, +{ 16,24,68,35,71,18,149,118,191,167,9,0,212,124,65,246,7,67,140,189,399,101,133,60,1,108,267,114,69,92,695,154 }, +{ 28,0,9,67,1,22,109,36,55,65,194,114,39,83,49,69,2,35,103,50,158,208,86,420,168,289,505,24,7,185,5,323 }, +{ 147,325,198,427,142,178,202,798,5,376,20,318,259,43,120,450,77,234,534,904,470,465,878,725,329,14,315,0,260,858,70,61 }, +{ 28,0,65,9,109,1,55,67,35,22,24,39,289,7,175,14,114,2,158,124,420,194,68,16,336,36,49,69,168,570,154,505 }, +{ 141,180,13,115,1,23,4,269,2,202,0,317,217,51,15,5,21,352,77,318,3,752,197,10,165,365,137,341,9,515,33,64 }, +{ 15,515,700,0,753,1,2,13,23,9,51,5,4,901,33,7,8,3,12,93,180,120,197,6,82,341,10,141,22,260,457,115 }, +{ 6,74,66,1,25,75,324,380,278,26,138,85,135,500,87,42,220,841,97,350,29,19,70,226,38,21,52,606,235,889,2,14 }, +{ 13,23,51,12,4,15,453,0,457,1,403,165,115,3,11,2,64,5,401,10,515,37,202,33,40,32,99,475,197,700,308,17 }, +{ 788,180,5,83,693,319,314,4,32,21,17,11,817,3,510,498,33,12,24,104,814,120,64,117,306,804,523,450,288,160,102,43 }, +{ 15,1,515,13,2,700,23,0,4,753,3,341,5,51,33,11,10,6,77,7,115,102,180,165,141,9,197,217,901,40,12,64 }, +{ 101,18,9,167,520,16,0,118,60,212,604,364,694,24,55,252,68,917,264,35,1,121,146,363,39,100,806,5,21,166,191,28 }, +{ 13,23,0,33,51,141,77,4,64,2,115,217,9,102,7,202,21,10,180,3,15,128,5,269,6,32,11,16,165,352,22,317 }, +{ 66,135,97,74,172,6,278,7,204,324,138,174,29,85,2,87,25,140,92,192,52,38,802,69,448,500,808,620,22,1,280,232 }, +{ 2,24,69,6,97,7,1,0,14,298,423,66,67,29,150,25,189,267,124,74,607,18,36,81,172,33,83,38,52,273,71,809 }, +{ 0,9,158,39,68,49,109,16,24,333,35,344,101,22,159,254,272,30,124,65,28,18,793,154,310,252,327,105,627,419,286,55 }, +{ 15,515,700,10,753,33,77,180,4,341,1,197,13,115,365,23,901,317,5,102,11,217,165,117,141,40,2,3,253,21,134,55 }, +{ 100,166,382,478,265,264,88,39,98,1,404,274,9,593,724,921,639,438,363,682,411,31,30,812,96,447,821,905,252,0,223,435 }, +{ 19,283,436,53,297,26,813,432,27,128,42,25,390,503,122,736,73,123,605,63,389,529,630,250,690,65,381,444,6,269,108,216 }, +{ 33,202,13,128,494,0,51,141,269,1,4,2,102,180,15,137,65,95,6,450,77,40,117,59,457,36,196,817,134,86,49,515 }, +{ 18,16,60,68,101,167,191,118,35,121,9,212,55,0,126,1,24,647,199,146,520,107,628,621,363,71,21,28,346,92,806,727 }, +{ 352,230,217,531,160,93,36,669,748,854,689,258,137,871,728,699,752,251,574,202,373,351,228,120,717,260,144,219,268,82,816,1 }, +{ 15,515,700,753,13,23,0,51,8,1,4,82,165,77,110,33,10,180,5,202,11,22,120,12,9,197,115,93,403,141,40,351 }, +{ 0,9,101,217,35,88,352,100,39,175,30,68,562,752,13,317,252,115,180,197,64,1,83,141,65,213,165,230,194,36,28,265 }, +{ 32,64,5,470,288,90,21,147,0,95,356,22,20,519,835,312,819,18,247,182,11,97,13,4,387,49,43,298,316,48,107,7 }, +{ 16,14,24,1,7,2,35,0,5,50,18,69,46,12,58,75,9,67,70,163,21,54,38,48,223,502,281,37,140,60,28,10 }, +{ 18,265,9,252,39,195,354,411,1,16,132,101,121,682,167,203,5,363,146,593,35,333,21,271,60,13,100,0,156,327,7,520 }, +{ 4,13,1,115,141,23,2,180,5,0,51,3,217,202,77,15,33,269,341,318,317,21,165,515,11,10,197,365,9,137,64,352 }, +{ 7,24,16,14,71,35,18,92,140,189,108,149,68,60,69,150,2,230,97,66,0,458,67,1,65,251,38,314,388,267,36,46 }, +{ 1,22,31,36,0,2,44,5,141,105,180,170,12,64,13,98,86,55,23,21,28,164,115,127,10,125,128,4,9,239,352,197 }, +{ 66,7,97,2,192,20,52,43,135,74,560,107,104,0,750,147,414,29,580,173,324,376,226,194,77,174,204,38,356,64,16,470 }, +{ 0,101,9,68,35,124,24,65,39,16,252,100,154,166,28,364,149,694,30,88,55,346,1,419,71,439,265,289,22,21,175,158 }, +{ 39,9,100,30,127,0,737,856,31,836,827,254,98,931,166,88,93,1,44,190,131,228,120,395,625,385,863,264,219,373,110,28 }, +{ 5,17,2,3,21,45,14,155,48,32,38,1,328,181,186,46,23,51,12,61,227,29,106,54,99,133,62,832,13,37,514,543 }, +{ 131,613,30,224,228,716,274,100,827,406,219,856,39,190,31,88,1,166,9,44,829,863,931,93,0,187,625,924,127,98,137,254 }, +{ 352,217,64,141,752,269,180,864,437,372,954,115,498,177,77,349,317,318,579,291,947,197,247,0,23,717,237,304,128,457,776,678 }, +{ 141,217,180,317,352,115,15,341,1,23,13,365,515,752,2,64,0,5,498,700,372,165,51,237,753,77,244,197,137,4,21,253 }, +{ 5,2,17,14,1,45,3,38,21,29,181,58,46,48,50,133,114,171,61,155,32,6,186,281,361,12,36,54,4,13,52,514 }, +{ 6,2,119,25,790,4,45,483,655,113,1,29,76,26,32,19,887,17,128,0,292,833,59,61,106,64,77,814,14,151,84,42 }, +{ 13,23,2,0,51,4,1,115,141,3,5,180,33,217,77,9,202,11,7,15,10,6,317,64,21,197,515,165,102,128,22,269 }, +{ 13,23,51,202,21,5,1,15,141,165,120,2,115,12,32,0,515,128,318,64,125,700,4,403,197,453,180,457,3,7,10,6 }, +{ 9,39,31,30,0,127,1,44,100,131,98,187,385,276,88,442,219,908,254,116,49,166,935,28,201,36,141,827,137,299,284,21 }, +{ 22,49,28,109,9,185,105,95,1,131,159,272,36,67,86,254,39,55,35,0,505,31,83,169,208,327,286,98,168,535,312,708 }, +{ 5,1,2,21,0,12,48,22,10,14,36,3,32,17,23,54,86,38,4,51,13,37,137,50,65,281,114,45,28,99,58,202 }, +{ 141,269,352,82,217,351,180,854,372,922,752,1,15,260,317,318,515,202,64,700,120,349,954,753,77,35,67,717,898,137,365,115 }, +{ 144,203,613,418,326,406,96,669,137,679,1,228,494,822,840,317,36,83,855,160,817,859,856,816,217,831,345,93,876,77,44,251 }, +{ 15,13,515,23,700,753,51,1,33,0,202,21,2,5,180,141,120,165,217,82,12,117,4,352,269,197,115,32,3,9,134,260 }, +{ 13,23,115,1,0,51,4,77,2,33,15,141,10,5,341,180,515,3,217,202,9,365,317,64,700,102,11,165,197,22,753,7 }, +{ 531,943,373,160,728,93,206,260,261,559,964,269,717,535,332,384,365,295,110,533,141,10,180,352,244,137,120,55,959,564,36,253 }, +{ 1,31,36,170,22,55,44,10,86,64,127,0,2,98,301,164,740,338,237,143,5,125,116,13,242,141,299,180,23,169,105,12 }, +{ 13,23,141,77,51,4,64,32,33,202,115,269,102,128,21,0,177,180,318,90,40,10,7,5,137,15,217,352,9,291,59,22 }, +{ 202,120,260,318,77,15,450,269,1,82,33,23,141,13,51,515,351,128,700,5,64,326,137,21,102,110,753,494,93,523,817,165 }, +{ 523,899,102,33,730,15,23,403,719,117,153,13,515,51,475,4,700,5,453,817,196,753,494,40,202,120,1,2,450,457,17,421 }, +{ 202,403,494,450,120,817,523,475,318,453,33,402,128,77,13,51,260,576,342,102,15,23,515,4,700,5,82,753,326,210,137,615 }, +{ 2,29,112,66,7,52,70,151,58,87,135,5,74,226,307,6,14,186,1,45,549,172,644,25,113,287,46,155,334,64,294,97 }, +{ 1,77,349,291,260,120,652,102,5,39,64,269,9,33,340,342,13,98,888,698,23,296,100,318,51,202,87,137,638,128,50,850 }, +{ 1,13,15,2,0,4,23,515,5,141,180,3,700,341,115,51,753,269,77,901,197,352,217,33,21,11,365,6,165,202,7,317 }, +{ 7,14,16,2,46,5,70,58,1,38,24,35,92,163,0,75,21,18,50,54,140,12,87,220,155,69,171,23,60,9,13,307 }, +{ 2,29,66,226,135,7,87,74,52,278,6,75,222,220,294,70,97,1,145,25,172,262,324,38,69,112,331,92,5,14,140,26 }, +{ 15,515,700,753,33,77,117,4,1,102,134,40,153,11,13,196,217,21,5,51,23,115,32,3,2,202,141,137,128,291,48,177 }, +{ 15,217,82,515,351,141,317,1,13,700,260,77,110,120,115,854,23,753,180,51,21,36,137,922,5,64,365,352,291,202,93,341 }, +{ 31,190,30,373,120,110,863,88,44,127,908,856,260,318,82,98,93,187,836,717,935,39,442,131,141,254,228,219,1,968,77,116 }, +{ 23,13,308,9,165,115,51,21,401,125,49,39,197,391,159,254,217,743,28,438,773,629,558,386,341,95,32,317,876,679,109,166 }, +{ 49,5,43,165,7,0,21,104,125,22,173,422,64,13,623,102,20,18,314,95,91,141,23,31,193,51,391,900,779,558,92,232 }, +{ 1,4,13,2,15,0,23,515,77,3,341,33,5,700,115,51,202,753,141,180,11,10,102,217,6,901,40,7,197,318,317,365 }, +{ 93,843,295,120,36,160,206,261,10,137,567,110,384,141,943,268,201,332,258,55,1,180,64,116,44,144,699,203,282,31,260,373 }, +{ 81,7,192,426,43,173,172,104,879,91,5,712,715,526,6,97,568,95,448,66,33,861,560,32,49,20,0,636,232,825,2,22 }, +{ 475,403,51,453,33,102,13,23,494,202,0,196,15,77,153,18,4,117,515,450,318,22,730,128,700,421,65,753,269,402,134,817 }, +{ 141,269,260,318,202,120,352,349,82,351,1,5,854,137,64,291,15,922,180,851,32,77,515,372,21,700,7,217,13,947,33,753 }, +{ 15,515,700,753,1,13,0,2,4,23,3,5,180,115,197,12,51,165,217,10,961,9,6,141,352,21,8,7,33,77,457,120 }, +{ 39,166,9,30,0,101,274,404,252,333,190,100,158,438,310,88,68,265,656,21,1,530,329,344,49,539,625,254,13,131,48,419 }, +{ 0,1,28,9,22,12,65,83,67,36,5,2,50,55,96,109,16,13,24,23,21,238,49,18,285,160,128,39,69,114,7,323 }, +{ 15,515,700,1,0,753,23,2,13,51,5,180,115,6,3,9,197,12,457,120,7,165,901,82,4,21,8,141,31,33,719,341 }, +{ 9,39,30,0,28,166,22,49,180,1,352,35,317,158,88,141,498,131,115,345,752,128,228,217,100,83,219,930,13,251,365,36 }, +{ 5,61,45,2,80,29,311,209,6,17,58,1,151,106,454,667,243,70,52,496,287,592,255,738,64,74,483,14,27,32,112,19 }, +{ 31,125,22,44,299,456,685,242,599,116,170,28,0,1,492,393,506,144,558,10,268,301,239,23,13,36,963,367,55,206,105,95 }, +{ 187,258,926,574,839,93,228,860,406,219,871,160,137,531,224,116,120,902,669,201,36,131,44,144,843,533,318,384,442,1,434,268 }, +{ 7,107,75,16,87,9,64,177,24,18,291,77,349,141,60,232,23,0,51,269,132,14,5,21,70,32,678,112,126,121,71,947 }, +{ 15,515,700,753,1,0,2,13,5,3,23,180,4,115,901,51,6,8,961,9,7,10,12,82,197,22,141,341,33,120,365,457 }, +{ 13,23,51,1,5,202,2,12,15,21,165,141,0,115,3,4,32,515,197,10,180,318,128,120,64,700,6,7,403,269,457,137 }, +{ 1,2,0,77,64,3,141,13,33,15,23,10,6,102,5,515,180,4,117,7,700,165,11,217,269,40,753,115,128,17,197,134 }, +{ 345,531,332,269,260,317,717,752,373,351,180,352,728,82,10,365,160,533,217,143,498,251,244,93,341,901,36,1,141,898,55,864 }, +{ 16,7,33,189,92,77,388,60,140,35,102,24,14,1,230,21,150,117,733,314,18,915,71,13,108,134,5,64,69,2,98,22 }, +{ 142,202,234,178,5,786,77,49,70,0,416,450,639,878,1,48,21,929,147,259,315,455,198,120,12,481,163,113,846,329,318,22 }, +{ 81,715,192,0,173,712,681,104,636,91,74,20,750,370,7,718,95,879,22,43,825,560,422,64,207,49,172,18,397,10,426,319 }, +{ 13,23,15,51,515,0,700,753,4,1,961,2,10,115,457,11,33,453,3,5,9,40,12,197,165,77,401,475,64,102,22,569 }, +{ 64,297,5,445,95,61,250,311,80,34,17,312,45,2,86,472,58,14,180,53,22,151,869,738,247,237,29,1,128,165,21,288 }, +{ 16,24,18,71,7,35,118,92,14,154,60,68,0,149,28,302,124,150,55,175,2,9,97,1,429,20,108,273,22,65,43,126 }, +{ 195,360,156,771,132,163,626,687,591,371,883,146,121,846,70,586,379,13,293,98,407,48,761,296,354,18,31,1,55,49,21,105 }, +{ 202,13,77,23,318,33,51,0,4,141,5,21,217,32,291,102,64,128,15,10,9,494,269,137,515,403,1,31,117,700,120,317 }, +{ 7,192,97,81,172,66,426,173,43,715,712,232,861,879,104,330,568,298,74,893,885,526,387,825,92,140,91,14,636,6,5,448 }, +{ 2,1,14,6,67,7,65,69,24,36,66,124,108,83,38,29,22,86,0,18,484,5,28,46,12,10,25,302,150,16,650,74 }, +{ 33,77,13,202,102,4,0,23,128,51,141,64,318,22,403,269,137,10,15,40,494,117,32,59,11,153,1,21,177,196,515,115 }, +{ 269,141,318,77,349,291,217,202,33,15,372,304,515,22,102,177,351,700,352,120,5,137,10,317,260,753,64,851,854,403,49,21 }, +{ 6,74,66,85,138,25,87,42,135,26,226,222,280,29,75,500,220,278,792,70,19,2,1,294,204,64,32,145,853,112,52,174 }, +{ 9,0,105,39,16,18,1,101,272,31,127,98,24,518,333,252,310,28,68,737,846,371,158,916,938,49,30,7,286,35,301,455 }, +{ 2,1,14,67,24,46,83,108,69,29,38,103,114,36,6,133,18,0,28,7,65,52,236,75,50,398,5,309,135,16,278,160 }, +{ 22,1,28,105,49,95,0,2,67,55,36,239,168,159,65,35,14,170,320,164,9,7,10,5,114,12,83,64,194,109,24,301 }, +{ 7,66,172,97,92,140,232,568,298,14,192,314,16,380,135,324,2,330,74,38,357,448,126,69,35,5,107,6,387,60,204,572 }, +{ 2,29,1,14,5,6,46,133,114,50,52,26,218,108,19,13,366,236,27,45,70,17,58,23,86,51,137,65,112,38,25,12 }, +{ 6,1,74,25,2,26,29,66,42,19,75,14,388,67,108,70,52,85,103,65,38,138,357,133,114,594,324,516,603,96,309,69 }, +{ 22,9,28,1,36,49,109,105,86,95,131,31,169,39,0,141,272,159,44,55,98,180,13,30,185,115,83,128,352,137,64,208 }, +{ 203,822,326,23,77,859,403,494,576,39,473,182,33,1,691,100,18,217,13,817,411,447,363,102,93,966,96,478,291,704,310,120 }, +{ 15,515,700,753,33,77,117,4,102,134,115,153,13,1,40,217,11,196,341,2,5,3,23,317,365,0,21,291,32,51,12,569 }, +{ 15,515,700,753,13,0,23,1,8,82,51,165,197,120,180,2,9,33,4,110,5,12,10,260,351,386,141,7,457,475,93,901 }, +{ 1,23,13,15,51,0,21,2,515,5,141,180,120,165,700,202,197,4,753,12,33,9,7,82,115,93,3,352,260,6,110,48 }, +{ 15,515,700,753,0,1,2,5,3,4,8,13,180,341,10,23,7,6,9,51,77,197,961,115,165,82,120,31,22,202,457,217 }, +{ 559,661,922,564,141,533,10,317,373,110,143,269,244,260,332,261,93,642,752,295,351,876,531,843,180,206,728,384,352,1,434,120 }, +{ 15,515,1,0,700,2,13,23,753,5,51,180,3,165,12,6,197,115,4,9,7,21,719,8,457,82,141,120,33,22,901,10 }, +{ 198,234,0,325,5,77,202,416,20,147,32,43,639,315,49,61,450,455,142,21,113,230,22,318,725,342,207,13,95,904,494,10 }, +{ 447,264,363,9,411,676,682,1,156,664,821,478,166,354,812,39,100,905,382,897,98,18,759,404,31,101,724,5,265,223,88,13 }, +{ 24,14,69,16,35,18,2,7,108,189,71,67,267,149,1,46,68,83,38,140,0,236,251,9,388,60,133,103,65,28,29,50 }, +{ 16,7,35,20,14,18,109,2,43,120,107,60,1,121,326,907,553,77,13,147,23,82,68,260,0,403,5,24,202,126,265,199 }, +{ 30,131,187,276,31,44,613,442,39,9,190,228,1,839,116,935,908,219,127,88,244,224,110,137,93,201,98,141,36,567,0,856 }, +{ 98,223,1,393,812,265,100,421,593,834,697,48,51,410,791,382,21,88,31,284,9,125,96,293,230,23,213,217,656,689,541,5 }, +{ 98,51,127,219,616,258,105,293,395,421,924,512,31,308,23,201,116,44,301,272,763,276,125,13,453,170,401,295,261,944,115,567 }, +{ 253,110,951,352,811,206,332,180,141,244,282,10,854,417,642,638,559,752,143,911,260,55,93,533,499,498,661,120,351,959,564,341 }, +{ 49,9,159,254,272,158,0,131,28,39,627,105,327,286,22,518,688,578,68,347,374,101,224,424,95,35,219,24,16,364,65,344 }, +{ 105,22,131,272,98,286,327,109,374,239,28,95,320,219,9,224,55,127,187,36,578,169,64,185,538,1,159,10,371,634,49,616 }, +{ 691,478,340,1,658,914,724,363,744,698,156,772,411,296,682,447,9,284,335,98,264,303,909,21,354,410,225,13,664,686,88,919 }, +{ 16,24,35,18,71,7,140,108,189,267,92,60,14,230,68,69,9,1,149,46,246,191,388,167,2,0,118,236,133,21,674,5 }, +{ 9,0,127,31,98,371,395,39,737,49,1,44,385,272,512,28,293,242,836,761,254,299,101,16,187,22,116,158,159,131,18,21 }, +{ 16,18,68,0,60,35,9,101,252,28,118,24,419,55,7,109,604,71,39,121,22,364,14,158,191,167,925,126,329,21,92,49 }, +{ 116,268,203,93,206,692,551,31,417,940,499,8,473,44,202,523,959,0,120,137,559,22,450,403,576,10,728,299,13,326,51,1 }, +{ 225,459,744,1,919,914,691,330,622,21,141,223,5,284,934,335,88,538,340,82,385,839,363,120,478,98,48,30,64,32,686,166 }, +{ 5,1,2,0,14,36,21,281,12,48,50,67,22,28,54,83,24,218,38,10,181,9,32,18,65,58,45,114,430,17,99,37 }, +{ 137,450,202,704,120,260,326,318,968,269,851,403,291,77,23,141,182,310,494,373,351,457,82,890,349,110,60,128,817,678,105,96 }, +{ 15,515,700,753,4,33,13,23,77,5,40,11,102,93,1,21,110,51,82,117,141,2,10,8,32,64,120,31,202,3,217,115 }, +{ 15,13,1,23,515,0,51,2,700,5,753,21,180,141,165,3,12,115,197,4,7,6,457,9,352,202,33,8,719,120,77,341 }, +{ 219,127,258,98,276,201,131,395,944,293,116,284,567,31,242,105,137,935,295,44,403,860,51,224,576,456,9,371,578,475,202,512 }, +{ 16,7,18,35,60,0,14,20,118,28,68,22,2,24,1,92,158,107,5,49,154,126,109,12,43,10,55,6,677,71,21,168 }, +{ 93,728,531,160,559,373,574,120,295,860,533,269,717,260,926,902,258,318,36,201,261,434,851,137,617,141,187,352,843,384,332,251 }, +{ 28,0,1,9,22,109,83,39,49,12,36,67,55,5,96,2,128,30,158,69,21,23,160,208,35,13,65,323,50,141,194,238 }, +{ 23,13,51,1,5,15,141,21,0,2,165,515,202,700,12,197,180,120,32,115,4,753,64,9,7,269,6,3,125,386,48,453 }, +{ 5,21,13,49,14,20,7,23,43,32,1,0,652,48,713,22,38,2,16,132,955,107,12,279,24,888,197,640,70,303,18,638 }, +{ 9,0,28,39,1,30,35,101,22,67,83,141,49,175,36,68,55,88,13,251,10,69,23,158,180,115,64,100,217,65,345,166 }, +{ 260,120,82,269,5,450,351,1,202,141,854,13,77,922,32,33,137,4,23,125,291,21,15,515,165,349,177,700,318,326,180,753 }, +{ 121,16,18,35,363,101,60,20,107,14,68,259,621,55,604,43,7,252,9,364,126,0,167,191,5,407,132,28,199,419,146,10 }, +{ 13,23,51,1,15,0,2,141,5,515,12,21,700,115,165,180,4,753,3,197,202,32,9,120,7,8,6,11,37,10,457,269 }, +{ 201,144,206,443,418,203,435,96,335,459,187,1,88,332,330,321,269,934,30,372,822,521,268,326,44,523,382,141,410,264,494,473 }, +{ 31,44,276,201,116,131,284,662,567,144,9,489,98,295,268,434,0,30,137,39,93,1,187,22,219,918,110,299,141,36,224,384 }, +{ 520,478,664,1,264,604,9,167,777,759,411,0,806,724,48,21,101,68,647,936,363,223,118,682,410,18,100,16,252,98,265,13 }, +{ 23,70,21,87,60,75,120,182,163,379,92,18,7,937,71,121,446,132,24,98,931,126,107,77,795,195,115,44,411,146,51,850 }, +{ 39,9,0,737,127,31,846,98,1,827,105,310,371,30,254,100,44,18,395,242,272,101,385,916,836,16,265,131,938,93,166,557 }, +{ 279,20,43,126,107,7,92,16,356,362,60,595,246,359,598,35,0,683,939,653,121,97,125,441,399,392,150,199,48,230,14,649 }, +{ 206,417,93,959,499,728,8,559,120,473,137,141,10,564,31,260,44,450,203,341,253,244,373,116,143,638,268,180,352,110,318,940 }, +{ 259,465,147,132,590,687,534,199,581,146,941,427,107,640,279,178,121,5,195,150,522,955,198,35,786,929,798,142,1,21,325,626 }, +{ 523,15,120,450,202,515,403,51,817,700,13,753,23,457,33,899,128,64,730,102,494,342,115,719,453,196,49,99,318,421,308,5 }, +{ 141,559,10,244,365,564,661,180,253,143,752,110,55,317,533,341,901,93,373,206,535,160,82,922,260,36,531,964,352,332,261,197 }, +{ 219,258,98,127,276,964,943,137,843,535,201,935,131,860,261,295,284,567,206,44,116,31,253,492,203,332,160,615,36,93,55,692 }, +{ 0,319,422,207,945,693,577,887,32,804,95,344,104,904,61,20,5,43,7,725,113,510,306,102,49,263,153,426,33,83,22,9 }, +{ 1,22,0,5,12,2,36,21,28,86,49,105,9,10,23,13,141,95,31,55,128,37,51,4,83,202,3,64,96,7,32,44 }, +{ 15,515,13,700,1,0,753,2,23,3,4,5,51,10,115,197,6,33,12,9,165,7,8,77,11,961,180,269,141,22,120,457 }, +{ 15,1,23,120,77,13,515,51,141,202,700,180,110,137,260,753,326,5,128,102,0,21,2,165,269,33,197,450,318,217,93,115 }, +{ 15,515,1,700,0,753,13,2,23,180,51,5,120,4,9,115,197,12,7,165,21,33,6,82,3,8,523,901,31,141,457,260 }, +{ 16,18,24,7,92,35,60,75,9,13,71,14,0,108,50,21,126,121,1,140,23,5,132,146,2,12,128,10,64,141,70,87 }, +{ 180,341,901,15,515,1,365,700,0,2,197,753,115,4,10,13,752,5,3,8,165,317,141,23,143,873,44,31,569,55,93,6 }, +{ 9,0,175,35,101,28,39,67,68,1,65,83,30,69,364,336,22,114,55,124,194,158,100,289,252,166,64,345,103,36,50,88 }, +{ 64,165,180,197,115,247,217,237,21,13,32,316,22,141,352,72,288,304,95,225,76,391,386,16,468,90,49,35,365,640,372,23 }, +{ 15,515,700,13,753,1,0,2,23,4,5,3,115,51,141,197,12,10,180,961,7,9,21,33,217,6,8,165,457,11,77,341 }, +{ 132,121,199,146,60,279,493,640,407,598,126,195,534,581,955,590,107,5,150,35,522,49,259,16,18,360,156,0,147,362,21,167 }, +{ 0,1,28,9,22,5,36,12,65,24,67,96,2,83,18,50,114,55,21,16,7,10,23,14,13,160,137,51,48,218,103,69 }, +{ 93,120,957,77,30,968,459,110,137,160,613,102,202,352,373,141,31,372,217,330,190,318,269,260,203,44,28,473,228,177,863,704 }, +{ 15,515,700,0,753,1,13,23,2,51,5,9,120,82,4,7,901,197,10,8,260,180,341,12,33,6,3,523,165,102,115,141 }, +{ 206,417,8,141,499,44,244,93,31,10,137,253,559,116,728,144,120,564,269,638,203,352,143,260,341,752,268,717,951,180,160,110 }, +{ 530,254,228,1,96,21,406,39,827,31,669,840,613,829,137,679,166,98,23,51,960,438,131,93,48,224,219,317,310,36,876,190 }, +{ 15,515,700,753,13,457,0,197,719,1,165,82,23,8,120,730,2,10,12,180,134,5,9,141,260,4,351,51,115,3,341,899 }, +{ 0,16,68,9,24,28,18,35,252,109,39,419,124,158,154,55,101,71,22,118,60,7,49,65,333,14,1,10,329,364,677,346 }, +{ 1,15,13,23,515,51,120,0,700,180,2,165,5,753,141,197,21,33,202,102,260,4,9,12,7,326,137,450,115,6,82,110 }, +{ 535,253,352,564,110,365,82,180,341,10,854,533,55,898,244,901,873,141,752,143,642,559,498,317,36,951,115,964,638,282,661,197 }, +{ 31,44,125,338,116,64,242,36,1,10,55,22,456,237,180,13,299,164,506,86,23,165,558,143,0,762,492,479,844,546,93,8 }, +{ 13,23,4,1,202,2,0,51,115,77,141,180,5,15,217,3,33,11,515,317,9,10,102,21,700,341,365,318,269,64,32,128 }, +{ 9,39,0,166,68,101,28,364,30,158,562,35,175,65,333,154,49,404,706,124,21,252,274,168,190,289,100,570,16,1,310,346 }, +{ 15,515,700,753,341,13,0,23,1,33,141,4,260,82,77,51,351,180,9,5,115,137,10,217,11,120,102,40,349,269,202,854 } diff --git a/libkram/bc7enc/utils.cpp b/libkram/bc7enc/utils.cpp new file mode 100644 index 00000000..37e24f70 --- /dev/null +++ b/libkram/bc7enc/utils.cpp @@ -0,0 +1,909 @@ +// File: utils.cpp +#include "utils.h" + +// Don't need the impl yet +#if 0 + +#include "lodepng.h" +#include "miniz.h" + + +namespace utils +{ +using namespace STL_NAMESPACE; + +#define FLOOD_PUSH(y, xl, xr, dy) if (((y + (dy)) >= 0) && ((y + (dy)) < (int)m_height)) { stack.push_back(fill_segment(y, xl, xr, dy)); } + +// See http://www.realtimerendering.com/resources/GraphicsGems/gems/SeedFill.c +uint32_t image_u8::flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, vector* pSet_pixels) +{ + uint32_t total_set = 0; + + if (!flood_fill_is_inside(x, y, b)) + return 0; + + vector stack; + stack.reserve(64); + + FLOOD_PUSH(y, x, x, 1); + FLOOD_PUSH(y + 1, x, x, -1); + + while (stack.size()) + { + fill_segment s = stack.back(); + stack.pop_back(); + + int x1 = s.m_xl, x2 = s.m_xr, dy = s.m_dy; + y = s.m_y + s.m_dy; + + for (x = x1; (x >= 0) && flood_fill_is_inside(x, y, b); x--) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + + int l; + + if (x >= x1) + goto skip; + + l = x + 1; + if (l < x1) + FLOOD_PUSH(y, l, x1 - 1, -dy); + + x = x1 + 1; + + do + { + for (; x <= ((int)m_width - 1) && flood_fill_is_inside(x, y, b); x++) + { + (*this)(x, y) = c; + total_set++; + if (pSet_pixels) + pSet_pixels->push_back(pixel_coord(x, y)); + } + FLOOD_PUSH(y, l, x - 1, dy); + + if (x > (x2 + 1)) + FLOOD_PUSH(y, x2 + 1, x - 1, -dy); + + skip: + for (x++; x <= x2 && !flood_fill_is_inside(x, y, b); x++) + ; + + l = x; + } while (x <= x2); + } + + return total_set; +} + +void image_u8::draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color) +{ + if (xs > xe) + { + std::swap(xs, xe); + std::swap(ys, ye); + } + + int dx = xe - xs, dy = ye - ys; + if (!dx) + { + if (ys > ye) + std::swap(ys, ye); + for (int i = ys; i <= ye; i++) + set_pixel_clipped(xs, i, color); + } + else if (!dy) + { + for (int i = xs; i < xe; i++) + set_pixel_clipped(i, ys, color); + } + else if (dy > 0) + { + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); + rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); + } + } + else + { + dy = -dy; + if (dy <= dx) + { + int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); + rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); + } + } +} + +void image_u8::rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color) +{ + int start, end, var; + + if (pred) + { + start = ys; + end = ye; + var = xs; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(var, i, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } + else + { + start = xs; + end = xe; + var = ys; + for (int i = start; i <= end; i++) + { + set_pixel_clipped(i, var, color); + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } +} + +bool load_png(const char* pFilename, image_u8& img) +{ + img.clear(); + + vector pixels; + unsigned int w = 0, h = 0; + unsigned int e = lodepng::decode(pixels, w, h, pFilename); + if (e != 0) + { + fprintf(stderr, "Failed loading PNG file %s\n", pFilename); + return false; + } + + img.init(w, h); + memcpy(&img.get_pixels()[0], &pixels[0], w * h * sizeof(uint32_t)); + + return true; +} + +bool save_png(const char* pFilename, const image_u8& img, bool save_alpha) +{ + const uint32_t w = img.width(); + const uint32_t h = img.height(); + + vector pixels; + if (save_alpha) + { + pixels.resize(w * h * sizeof(color_quad_u8)); + memcpy(&pixels[0], &img.get_pixels()[0], w * h * sizeof(color_quad_u8)); + } + else + { + pixels.resize(w * h * 3); + unsigned char* pDst = &pixels[0]; + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++, pDst += 3) + pDst[0] = img(x, y)[0], pDst[1] = img(x, y)[1], pDst[2] = img(x, y)[2]; + } + + return lodepng::encode(pFilename, pixels, w, h, save_alpha ? LCT_RGBA : LCT_RGB) == 0; +} + +static float gauss(int x, int y, float sigma_sqr) +{ + float pow = expf(-((x * x + y * y) / (2.0f * sigma_sqr))); + float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; + return g; +} + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) +{ + assert(size_x & size_y & 1); + + if (!(size_x | size_y)) + return; + + int mid_x = size_x / 2; + int mid_y = size_y / 2; + + double sum = 0; + for (int x = 0; x < size_x; x++) + { + for (int y = 0; y < size_y; y++) + { + float g; + if ((x > mid_x) && (y < mid_y)) + g = pDst[(size_x - x - 1) + y * size_x]; + else if ((x < mid_x) && (y > mid_y)) + g = pDst[x + (size_y - y - 1) * size_x]; + else if ((x > mid_x) && (y > mid_y)) + g = pDst[(size_x - x - 1) + (size_y - y - 1) * size_x]; + else + g = gauss(x - mid_x, y - mid_y, sigma_sqr); + + pDst[x + y * size_x] = g; + sum += g; + } + } + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + { + sum = pDst[mid_x + mid_y * size_x]; + } + + if (flags & (cComputeGaussianFlagNormalizeCenterToOne | cComputeGaussianFlagNormalize)) + { + double one_over_sum = 1.0f / sum; + for (int i = 0; i < size_x * size_y; i++) + pDst[i] = static_cast(pDst[i] * one_over_sum); + + if (flags & cComputeGaussianFlagNormalizeCenterToOne) + pDst[mid_x + mid_y * size_x] = 1.0f; + } + + if (flags & cComputeGaussianFlagPrint) + { + printf("{\n"); + for (int y = 0; y < size_y; y++) + { + printf(" "); + for (int x = 0; x < size_x; x++) + { + printf("%f, ", pDst[x + y * size_x]); + } + printf("\n"); + } + printf("}"); + } +} + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor) +{ + assert(odd_filter_width && (odd_filter_width & 1)); + odd_filter_width |= 1; + + vector kernel(odd_filter_width * odd_filter_width); + compute_gaussian_kernel(&kernel[0], odd_filter_width, odd_filter_width, sigma_sqr, cComputeGaussianFlagNormalize); + + const int dst_width = orig_img.get_width() / width_divisor; + const int dst_height = orig_img.get_height() / height_divisor; + + const int H = odd_filter_width / 2; + const int L = -H; + + dst.crop(dst_width, dst_height); + +#pragma omp parallel for + for (int oy = 0; oy < dst_height; oy++) + { + for (int ox = 0; ox < dst_width; ox++) + { + vec4F c(0.0f); + + for (int yd = L; yd <= H; yd++) + { + int y = oy * height_divisor + (height_divisor >> 1) + yd; + + for (int xd = L; xd <= H; xd++) + { + int x = ox * width_divisor + (width_divisor >> 1) + xd; + + const vec4F& p = orig_img.get_clamped_or_wrapped(x, y, wrapping, wrapping); + + float w = kernel[(xd + H) + (yd + H) * odd_filter_width]; + c[0] += p[0] * w; + c[1] += p[1] * w; + c[2] += p[2] * w; + c[3] += p[3] * w; + } + } + + dst(ox, oy).set(c[0], c[1], c[2], c[3]); + } + } +} + +static void pow_image(const imagef& src, imagef& dst, const vec4F& power) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + if ((power[0] == 2.0f) && (power[1] == 2.0f) && (power[2] == 2.0f) && (power[3] == 2.0f)) + dst(x, y).set(p[0] * p[0], p[1] * p[1], p[2] * p[2], p[3] * p[3]); + else + dst(x, y).set(powf(p[0], power[0]), powf(p[1], power[1]), powf(p[2], power[2]), powf(p[3], power[3])); + } + } +} + +#if 0 +static void mul_image(const imagef& src, imagef& dst, const vec4F& mul) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + dst(x, y).set(p[0] * mul[0], p[1] * mul[1], p[2] * mul[2], p[3] * mul[3]); + } + } +} +#endif + +static void scale_image(const imagef& src, imagef& dst, const vec4F& scale, const vec4F& shift) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + d[c] = scale[c] * p[c] + shift[c]; + + dst(x, y).set(d[0], d[1], d[2], d[3]); + } + } +} + +static void add_weighted_image(const imagef& src1, const vec4F& alpha, const imagef& src2, const vec4F& beta, const vec4F& gamma, imagef& dst) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set( + s1[0] * alpha[0] + s2[0] * beta[0] + gamma[0], + s1[1] * alpha[1] + s2[1] * beta[1] + gamma[1], + s1[2] * alpha[2] + s2[2] * beta[2] + gamma[2], + s1[3] * alpha[3] + s2[3] * beta[3] + gamma[3]); + } + } +} + +static void add_image(const imagef& src1, const imagef& src2, imagef& dst) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + dst(x, y).set(s1[0] + s2[0], s1[1] + s2[1], s1[2] + s2[2], s1[3] + s2[3]); + } + } +} + +static void adds_image(const imagef& src, const vec4F& value, imagef& dst) +{ + dst.resize(src); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& p = src(x, y); + + dst(x, y).set(p[0] + value[0], p[1] + value[1], p[2] + value[2], p[3] + value[3]); + } + } +} + +static void mul_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v1 = s1[c]; + float v2 = s2[c]; + d[c] = v1 * v2 * scale[c]; + } + + dst(x, y) = d; + } + } +} + +static void div_image(const imagef& src1, const imagef& src2, imagef& dst, const vec4F& scale) +{ + dst.resize(src1); + +#pragma omp parallel for + for (int y = 0; y < (int)dst.get_height(); y++) + { + for (uint32_t x = 0; x < dst.get_width(); x++) + { + const vec4F& s1 = src1(x, y); + const vec4F& s2 = src2(x, y); + + vec4F d; + + for (uint32_t c = 0; c < 4; c++) + { + float v = s2[c]; + if (v == 0.0f) + d[c] = 0.0f; + else + d[c] = (s1[c] * scale[c]) / v; + } + + dst(x, y) = d; + } + } +} + +static vec4F avg_image(const imagef& src) +{ + vec4F avg(0.0f); + + for (uint32_t y = 0; y < src.get_height(); y++) + { + for (uint32_t x = 0; x < src.get_width(); x++) + { + const vec4F& s = src(x, y); + + avg += vec4F(s[0], s[1], s[2], s[3]); + } + } + + avg /= static_cast(src.get_total_pixels()); + + return avg; +} + +// Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html +vec4F compute_ssim(const imagef& a, const imagef& b) +{ + imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; + + const float C1 = 6.50250f, C2 = 58.52250f; + + pow_image(a, a_sq, vec4F(2)); + pow_image(b, b_sq, vec4F(2)); + mul_image(a, b, axb, vec4F(1.0f)); + + gaussian_filter(mu1, a, 11, 1.5f * 1.5f); + gaussian_filter(mu2, b, 11, 1.5f * 1.5f); + + pow_image(mu1, mu1_sq, vec4F(2)); + pow_image(mu2, mu2_sq, vec4F(2)); + mul_image(mu1, mu2, mu1_mu2, vec4F(1.0f)); + + gaussian_filter(s1_sq, a_sq, 11, 1.5f * 1.5f); + add_weighted_image(s1_sq, vec4F(1), mu1_sq, vec4F(-1), vec4F(0), s1_sq); + + gaussian_filter(s2_sq, b_sq, 11, 1.5f * 1.5f); + add_weighted_image(s2_sq, vec4F(1), mu2_sq, vec4F(-1), vec4F(0), s2_sq); + + gaussian_filter(s12, axb, 11, 1.5f * 1.5f); + add_weighted_image(s12, vec4F(1), mu1_mu2, vec4F(-1), vec4F(0), s12); + + scale_image(mu1_mu2, t1, vec4F(2), vec4F(0)); + adds_image(t1, vec4F(C1), t1); + + scale_image(s12, t2, vec4F(2), vec4F(0)); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t3, vec4F(1)); + + add_image(mu1_sq, mu2_sq, t1); + adds_image(t1, vec4F(C1), t1); + + add_image(s1_sq, s2_sq, t2); + adds_image(t2, vec4F(C2), t2); + + mul_image(t1, t2, t1, vec4F(1)); + + div_image(t3, t1, smap, vec4F(1)); + + return avg_image(smap); +} + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma) +{ + image_u8 ta(a), tb(b); + + if ((ta.width() != tb.width()) || (ta.height() != tb.height())) + { + fprintf(stderr, "compute_ssim: Cropping input images to equal dimensions\n"); + + const uint32_t w = std::min(a.width(), b.width()); + const uint32_t h = std::min(a.height(), b.height()); + ta.crop(w, h); + tb.crop(w, h); + } + + if (!ta.width() || !ta.height()) + { + assert(0); + return vec4F(0); + } + + if (luma) + { + for (uint32_t y = 0; y < ta.height(); y++) + { + for (uint32_t x = 0; x < ta.width(); x++) + { + ta(x, y).set((uint8_t)ta(x, y).get_luma(), ta(x, y).a); + tb(x, y).set((uint8_t)tb(x, y).get_luma(), tb(x, y).a); + } + } + } + + imagef fta, ftb; + + fta.set(ta); + ftb.set(tb); + + return compute_ssim(fta, ftb); +} + +bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header) +{ + (void)srgb; + + FILE* pFile = NULL; +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + if (!pFile) + { + fprintf(stderr, "Failed creating file %s!\n", pFilename); + return false; + } + + fwrite("DDS ", 4, 1, pFile); + + DDSURFACEDESC2 desc; + memset(&desc, 0, sizeof(desc)); + + desc.dwSize = sizeof(desc); + desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; + + desc.dwWidth = width; + desc.dwHeight = height; + + desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat); + + desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC; + + desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * pixel_format_bpp) >> 3; + desc.dwFlags |= DDSD_LINEARSIZE; + + desc.ddpfPixelFormat.dwRGBBitCount = 0; + + if ((!force_dx10_header) && + ((dxgi_format == DXGI_FORMAT_BC1_UNORM) || + (dxgi_format == DXGI_FORMAT_BC3_UNORM) || + (dxgi_format == DXGI_FORMAT_BC4_UNORM) || + (dxgi_format == DXGI_FORMAT_BC5_UNORM))) + { + if (dxgi_format == DXGI_FORMAT_BC1_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '1'); + else if (dxgi_format == DXGI_FORMAT_BC3_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', 'T', '5'); + else if (dxgi_format == DXGI_FORMAT_BC4_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '1'); + else if (dxgi_format == DXGI_FORMAT_BC5_UNORM) + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('A', 'T', 'I', '2'); + + fwrite(&desc, sizeof(desc), 1, pFile); + } + else + { + desc.ddpfPixelFormat.dwFourCC = (uint32_t)PIXEL_FMT_FOURCC('D', 'X', '1', '0'); + + fwrite(&desc, sizeof(desc), 1, pFile); + + DDS_HEADER_DXT10 hdr10; + memset(&hdr10, 0, sizeof(hdr10)); + + // Not all tools support DXGI_FORMAT_BC7_UNORM_SRGB (like NVTT), but ddsview in DirectXTex pays attention to it. So not sure what to do here. + // For best compatibility just write DXGI_FORMAT_BC7_UNORM. + //hdr10.dxgiFormat = srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM; + hdr10.dxgiFormat = dxgi_format; // DXGI_FORMAT_BC7_UNORM; + hdr10.resourceDimension = D3D10_RESOURCE_DIMENSION_TEXTURE2D; + hdr10.arraySize = 1; + + fwrite(&hdr10, sizeof(hdr10), 1, pFile); + } + + fwrite(pBlocks, desc.lPitch, 1, pFile); + + if (fclose(pFile) == EOF) + { + fprintf(stderr, "Failed writing to DDS file %s!\n", pFilename); + return false; + } + + return true; +} + +void strip_extension(string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if (s[i] == '.') + { + s.resize(i); + break; + } + } +} + +void strip_path(string& s) +{ + for (int32_t i = (int32_t)s.size() - 1; i >= 0; i--) + { + if ((s[i] == '/') || (s[i] == ':') || (s[i] == '\\')) + { + s.erase(0, i + 1); + break; + } + } +} + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len) +{ + if (!pBuf || !len) + return 0; + + uint32_t h = static_cast(len); + + const uint32_t bytes_left = len & 3; + len >>= 2; + + while (len--) + { + const uint16_t* pWords = reinterpret_cast(pBuf); + + h += pWords[0]; + + const uint32_t t = (pWords[1] << 11) ^ h; + h = (h << 16) ^ t; + + pBuf += sizeof(uint32_t); + + h += h >> 11; + } + + switch (bytes_left) + { + case 1: + h += *reinterpret_cast(pBuf); + h ^= h << 10; + h += h >> 1; + break; + case 2: + h += *reinterpret_cast(pBuf); + h ^= h << 11; + h += h >> 17; + break; + case 3: + h += *reinterpret_cast(pBuf); + h ^= h << 16; + h ^= (static_cast(pBuf[sizeof(uint16_t)])) << 18; + h += h >> 11; + break; + default: + break; + } + + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps) +{ + tracked_stat comp_stats[4]; + + for (uint32_t y = 0; y < block_height; y++) + { + for (uint32_t x = 0; x < block_width; x++) + { + const color_quad_u8* pPixel = pPixels + x + y * block_width; + + for (uint32_t c = 0; c < num_comps; c++) + comp_stats[c].update(pPixel->m_c[c]); + } + } + + float max_std_dev = 0.0f; + for (uint32_t i = 0; i < num_comps; i++) + max_std_dev = std::max(max_std_dev, comp_stats[i].get_std_dev()); + return max_std_dev; +} + +const uint32_t ASTC_SIG = 0x5CA1AB13; + +#pragma pack(push, 1) +struct astc_header +{ + uint32_t m_sig; + uint8_t m_block_x; + uint8_t m_block_y; + uint8_t m_block_z; + uint8_t m_width[3]; + uint8_t m_height[3]; + uint8_t m_depth[3]; +}; +#pragma pack(pop) + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "wb"); +#else + pFile = fopen(pFilename, "wb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + memset(&hdr, 0, sizeof(hdr)); + + hdr.m_sig = ASTC_SIG; + hdr.m_block_x = (uint8_t)block_width; + hdr.m_block_y = (uint8_t)block_height; + hdr.m_block_z = 1; + hdr.m_width[0] = (uint8_t)(width); + hdr.m_width[1] = (uint8_t)(width >> 8); + hdr.m_width[2] = (uint8_t)(width >> 16); + hdr.m_height[0] = (uint8_t)(height); + hdr.m_height[1] = (uint8_t)(height >> 8); + hdr.m_height[2] = (uint8_t)(height >> 16); + hdr.m_depth[0] = 1; + fwrite(&hdr, sizeof(hdr), 1, pFile); + + fwrite(blocks.data(), 16, blocks.size(), pFile); + if (fclose(pFile) == EOF) + return false; + + return true; +} + +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height) +{ + FILE* pFile = nullptr; + +#ifdef _MSC_VER + fopen_s(&pFile, pFilename, "rb"); +#else + pFile = fopen(pFilename, "rb"); +#endif + + if (!pFile) + return false; + + astc_header hdr; + if (fread(&hdr, sizeof(hdr), 1, pFile) != 1) + { + fclose(pFile); + return false; + } + + if (hdr.m_sig != ASTC_SIG) + { + fclose(pFile); + return false; + } + + width = hdr.m_width[0] + (hdr.m_width[1] << 8) + (hdr.m_width[2] << 16); + height = hdr.m_height[0] + (hdr.m_height[1] << 8) + (hdr.m_height[2] << 16); + uint32_t depth = hdr.m_depth[0] + (hdr.m_depth[1] << 8) + (hdr.m_depth[2] << 16); + + if ((width < 1) || (width > 32768) || (height < 1) || (height > 32768)) + return false; + if ((hdr.m_block_z != 1) || (depth != 1)) + return false; + + block_width = hdr.m_block_x; + block_height = hdr.m_block_y; + + if ((block_width < 4) || (block_width > 12) || (block_height < 4) || (block_height > 12)) + return false; + + uint32_t blocks_x = (width + block_width - 1) / block_width; + uint32_t blocks_y = (height + block_height - 1) / block_height; + uint32_t total_blocks = blocks_x * blocks_y; + + blocks.resize(total_blocks); + + if (fread(blocks.data(), 16, total_blocks, pFile) != total_blocks) + { + fclose(pFile); + return false; + } + + fclose(pFile); + return true; +} + +uint32_t get_deflate_size(const void* pData, size_t data_size) +{ + size_t comp_size = 0; + void* pPre_RDO_Comp_data = tdefl_compress_mem_to_heap(pData, data_size, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); + mz_free(pPre_RDO_Comp_data); + + if (comp_size > UINT32_MAX) + return UINT32_MAX; + + return (uint32_t)comp_size; +} + +} // namespace utils + +#endif diff --git a/libkram/bc7enc/utils.h b/libkram/bc7enc/utils.h new file mode 100644 index 00000000..60c574b2 --- /dev/null +++ b/libkram/bc7enc/utils.h @@ -0,0 +1,2622 @@ +// File: utils.h +#pragma once +#ifdef _MSC_VER +#pragma warning (push) +#pragma warning (disable:4127) // conditional expression is constant +#endif + +#include +#include +#include +#include +#include +#include +//#include +//#include +//#include + +// on macOS, random pulls in std::string w/5x impls instanciated +#include + +#include +#include +//#include "dds_defs.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define ASSUME(c) static_assert(c, #c) +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) + +#define VECTOR_TEXT_LINE_SIZE (30.0f) +#define VECTOR_TEXT_CORE_LINE_SIZE (21.0f) + +#define UNUSED(x) (void)x + +namespace utils +{ +using namespace STL_NAMESPACE; + +extern const uint32_t g_pretty_colors[]; +extern const uint32_t g_num_pretty_colors; + +const float cDegToRad = 0.01745329252f; +const float cRadToDeg = 57.29577951f; + +enum eClear { cClear }; +enum eZero { cZero }; +enum eInitExpand { cInitExpand }; + +inline int iabs(int i) { if (i < 0) i = -i; return i; } +inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } +template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } +template inline F square(F a) { return a * a; } + +template +inline T prev_wrap(T i, T n) +{ + T temp = i - 1; + if (temp < 0) + temp = n - 1; + return temp; +} + +template +inline T next_wrap(T i, T n) +{ + T temp = i + 1; + if (temp >= n) + temp = 0; + return temp; +} + +inline int posmod(int x, int y) +{ + if (x >= 0) + return (x < y) ? x : (x % y); + int m = (-x) % y; + return (m != 0) ? (y - m) : m; +} + +inline float deg_to_rad(float f) +{ + return f * cDegToRad; +}; + +inline float rad_to_deg(float f) +{ + return f * cRadToDeg; +}; + +template +struct rel_ops +{ + friend bool operator!=(const T& x, const T& y) + { + return (!(x == y)); + } + friend bool operator>(const T& x, const T& y) + { + return (y < x); + } + friend bool operator<=(const T& x, const T& y) + { + return (!(y < x)); + } + friend bool operator>=(const T& x, const T& y) + { + return (!(x < y)); + } +}; + +template +class vec : public rel_ops > +{ +public: + typedef T scalar_type; + enum + { + num_elements = N + }; + + inline vec() + { + } + + inline vec(eClear) + { + clear(); + } + + inline vec(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + explicit inline vec(T val) + { + set(val); + } + + inline vec(T val0, T val1) + { + set(val0, val1); + } + + inline vec(T val0, T val1, T val2) + { + set(val0, val1, val2); + } + + inline vec(T val0, T val1, T val2, T val3) + { + set(val0, val1, val2, val3); + } + + inline vec(T val0, T val1, T val2, T val3, T val4, T val5) + { + set(val0, val1, val2, val3, val4, val5); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19); + } + + inline vec( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + set(val0, val1, val2, val3, + val4, val5, val6, val7, + val8, val9, val10, val11, + val12, val13, val14, val15, + val16, val17, val18, val19, + val20, val21, val22, val23, + val24); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template + inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint32_t m = std::min(N, ON); + uint32_t i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for (; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint32_t index, T val) + { + assert(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set(T val) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + inline vec& set(T val0, T val1) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + for (uint32_t i = 2; i < N; i++) + m_s[i] = 0; + } + return *this; + } + + inline vec& set(T val0, T val1, T val2) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + for (uint32_t i = 3; i < N; i++) + m_s[i] = 0; + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + for (uint32_t i = 4; i < N; i++) + m_s[i] = 0; + } + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3, T val4, T val5) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + if (N >= 5) + { + m_s[4] = val4; + + if (N >= 6) + { + m_s[5] = val5; + + for (uint32_t i = 6; i < N; i++) + m_s[i] = 0; + } + } + } + } + } + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + for (uint32_t i = 16; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + for (uint32_t i = 20; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set( + T val0, T val1, T val2, T val3, + T val4, T val5, T val6, T val7, + T val8, T val9, T val10, T val11, + T val12, T val13, T val14, T val15, + T val16, T val17, T val18, T val19, + T val20, T val21, T val22, T val23, + T val24) + { + m_s[0] = val0; + if (N >= 2) + m_s[1] = val1; + if (N >= 3) + m_s[2] = val2; + if (N >= 4) + m_s[3] = val3; + + if (N >= 5) + m_s[4] = val4; + if (N >= 6) + m_s[5] = val5; + if (N >= 7) + m_s[6] = val6; + if (N >= 8) + m_s[7] = val7; + + if (N >= 9) + m_s[8] = val8; + if (N >= 10) + m_s[9] = val9; + if (N >= 11) + m_s[10] = val10; + if (N >= 12) + m_s[11] = val11; + + if (N >= 13) + m_s[12] = val12; + if (N >= 14) + m_s[13] = val13; + if (N >= 15) + m_s[14] = val14; + if (N >= 16) + m_s[15] = val15; + + if (N >= 17) + m_s[16] = val16; + if (N >= 18) + m_s[17] = val17; + if (N >= 19) + m_s[18] = val18; + if (N >= 20) + m_s[19] = val19; + + if (N >= 21) + m_s[20] = val20; + if (N >= 22) + m_s[21] = val21; + if (N >= 23) + m_s[22] = val22; + if (N >= 24) + m_s[23] = val23; + + if (N >= 25) + m_s[24] = val24; + + for (uint32_t i = 25; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i) + { + return set(static_cast(other[i])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template + inline vec& swizzle_set(const vec& other, uint32_t i, uint32_t j, uint32_t k, uint32_t l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator=(const vec& rhs) + { + if (this != &rhs) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator=(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint32_t s = std::min(N, O); + + uint32_t i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for (; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator==(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator<(const vec& rhs) const + { + for (uint32_t i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[](uint32_t i) const + { + assert(i < N); + return m_s[i]; + } + + inline T& operator[](uint32_t i) + { + assert(i < N); + return m_s[i]; + } + + template + inline uint64_t get_component_as_uint() const + { + ASSUME(index < N); + if (sizeof(T) == sizeof(float)) + return *reinterpret_cast(&m_s[index]); + else + return *reinterpret_cast(&m_s[index]); + } + + inline T get_x(void) const + { + return m_s[0]; + } + inline T get_y(void) const + { + ASSUME(N >= 2); + return m_s[1]; + } + inline T get_z(void) const + { + ASSUME(N >= 3); + return m_s[2]; + } + inline T get_w(void) const + { + ASSUME(N >= 4); + return m_s[3]; + } + + inline vec get_x_vector() const + { + return broadcast<0>(); + } + inline vec get_y_vector() const + { + return broadcast<1>(); + } + inline vec get_z_vector() const + { + return broadcast<2>(); + } + inline vec get_w_vector() const + { + return broadcast<3>(); + } + + inline T get_component(uint32_t i) const + { + return (*this)[i]; + } + + inline vec& set_x(T v) + { + m_s[0] = v; + return *this; + } + inline vec& set_y(T v) + { + ASSUME(N >= 2); + m_s[1] = v; + return *this; + } + inline vec& set_z(T v) + { + ASSUME(N >= 3); + m_s[2] = v; + return *this; + } + inline vec& set_w(T v) + { + ASSUME(N >= 4); + m_s[3] = v; + return *this; + } + + inline const T* get_ptr() const + { + return reinterpret_cast(&m_s[0]); + } + inline T* get_ptr() + { + return reinterpret_cast(&m_s[0]); + } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint32_t i, uint32_t j) const + { + assert((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint32_t i, uint32_t j, uint32_t k) const + { + assert((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + assert((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const + { + return m_s[N - 1] == 0; + } + inline bool is_vector() const + { + return is_dir(); + } + inline bool is_point() const + { + return m_s[N - 1] == 1; + } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + template + inline vec broadcast() const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint32_t i, uint32_t j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint32_t i, uint32_t j, uint32_t k, uint32_t l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator-() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+() const + { + return *this; + } + + inline vec& operator+=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator-=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator*=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator/=(const vec& other) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator*=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator/=(T s) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + // component-wise multiply (not a dot product like in previous versions) + // just remarking it out because it's too ambiguous, use dot() or mul_components() instead +#if 0 + friend inline vec operator*(const vec& lhs, const vec& rhs) + { + return vec::mul_components(lhs, rhs); + } +#endif + + friend inline vec operator*(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator*(T val, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = val * rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/(const vec& lhs, T val) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator-(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + ASSUME(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross2(const vec& b) const + { + return cross2(*this, b); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + ASSUME(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + inline vec<3, T> cross3(const vec& b) const + { + return cross3(*this, b); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + ASSUME(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline vec<3, T> cross(const vec& b) const + { + ASSUME(N >= 2); + return cross(*this, b); + } + + inline T dot(const vec& rhs) const + { + return dot(*this, rhs); + } + + inline vec dot_vector(const vec& rhs) const + { + return vec(dot(*this, rhs)); + } + + static inline T dot(const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint32_t i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + inline T dot2(const vec& rhs) const + { + ASSUME(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + ASSUME(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T dot4(const vec& rhs) const + { + ASSUME(N >= 4); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2] + m_s[3] * rhs.m_s[3]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint32_t i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + // returns squared length (norm) + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint32_t i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast(1.0f / sqrt(n)); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + ASSUME(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l, h)); + return *this; + } + + inline vec& saturate() + { + return clamp(0.0f, 1.0f); + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint32_t i = 0; i < N; i++) + m_s[i] = static_cast(clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint32_t i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint32_t get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint32_t get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint32_t r = 0; + for (uint32_t i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline void get_projection_axes(uint32_t& u, uint32_t& v) const + { + const int axis = get_major_axis(); + if (m_s[axis] < 0.0f) + { + v = next_wrap(axis, N); + u = next_wrap(v, N); + } + else + { + u = next_wrap(axis, N); + v = next_wrap(u, N); + } + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::min(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint32_t i = 1; i < N; i++) + result = std::max(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::min(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint32_t i = 1; i < N; i++) + result = std::max(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + *this -= (dot(dir) * dir); + return *this; + } + + inline vec get_remove_unit_direction(const vec& dir) const + { + return *this - (dot(dir) * dir); + } + + inline bool all_less(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) + ret[1] = -m_s[1]; + if (N >= 3) + ret[2] = -m_s[2]; + + for (uint32_t i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint32_t i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + inline scalar_type perp_dot(const vec& b) const + { + ASSUME(N == 2); + return m_s[0] * b.m_s[1] - m_s[1] * b.m_s[0]; + } + + inline vec perp() const + { + ASSUME(N == 2); + return vec(-m_s[1], m_s[0]); + } + + inline vec get_floor() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = floor(m_s[i]); + return result; + } + + inline vec get_ceil() const + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = ceil(m_s[i]); + return result; + } + + // static helper methods + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec mul_add_components(const vec& a, const vec& b, const vec& c) + { + vec result; + for (uint32_t i = 0; i < N; i++) + result[i] = a.m_s[i] * b.m_s[i] + c.m_s[i]; + return result; + } + + static inline vec make_axis(uint32_t i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] == b[i]); + return ret; + } + + static inline vec not_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] != b[i]); + return ret; + } + + static inline vec less_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] < b[i]); + return ret; + } + + static inline vec less_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] <= b[i]); + return ret; + } + + static inline vec greater_equals_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] >= b[i]); + return ret; + } + + static inline vec greater_mask(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret[i] = (a[i] > b[i]); + return ret; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::max(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = std::min(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint32_t i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline bool equal_tol(const vec& a, const vec& b, float t) + { + for (uint32_t i = 0; i < N; i++) + if (!equal_tol(a.m_s[i], b.m_s[i], t)) + return false; + return true; + } + + inline bool equal_tol(const vec& b, float t) const + { + return equal_tol(*this, b, t); + } + +protected: + T m_s[N]; +}; + +typedef vec<1, double> vec1D; +typedef vec<2, double> vec2D; +typedef vec<3, double> vec3D; +typedef vec<4, double> vec4D; + +typedef vec<1, float> vec1F; + +typedef vec<2, float> vec2F; +typedef vector vec2F_array; + +typedef vec<3, float> vec3F; +typedef vector vec3F_array; + +typedef vec<4, float> vec4F; +typedef vector vec4F_array; + +typedef vec<2, uint32_t> vec2U; +typedef vec<3, uint32_t> vec3U; +typedef vec<2, int> vec2I; +typedef vec<3, int> vec3I; +typedef vec<4, int> vec4I; + +typedef vec<2, int16_t> vec2I16; +typedef vec<3, int16_t> vec3I16; + +inline vec2F rotate_point(const vec2F& p, float rad) +{ + float c = cos(rad); + float s = sin(rad); + + float x = p[0]; + float y = p[1]; + + return vec2F(x * c - y * s, x * s + y * c); +} + +class rect +{ +public: + inline rect() + { + } + + inline rect(eClear) + { + clear(); + } + + inline rect(eInitExpand) + { + init_expand(); + } + + // up to, but not including right/bottom + inline rect(int left, int top, int right, int bottom) + { + set(left, top, right, bottom); + } + + inline rect(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline rect(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline bool operator==(const rect& r) const + { + return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); + } + + inline bool operator<(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[i] < r.m_corner[i]) + return true; + else if (!(m_corner[i] == r.m_corner[i])) + return false; + } + + return false; + } + + inline void clear() + { + m_corner[0].clear(); + m_corner[1].clear(); + } + + inline void set(int left, int top, int right, int bottom) + { + m_corner[0].set(left, top); + m_corner[1].set(right, bottom); + } + + inline void set(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline void set(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline uint32_t get_width() const + { + return m_corner[1][0] - m_corner[0][0]; + } + inline uint32_t get_height() const + { + return m_corner[1][1] - m_corner[0][1]; + } + + inline int get_left() const + { + return m_corner[0][0]; + } + inline int get_top() const + { + return m_corner[0][1]; + } + inline int get_right() const + { + return m_corner[1][0]; + } + inline int get_bottom() const + { + return m_corner[1][1]; + } + + inline bool is_empty() const + { + return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); + } + + inline uint32_t get_dimension(uint32_t axis) const + { + return m_corner[1][axis] - m_corner[0][axis]; + } + inline uint32_t get_area() const + { + return get_dimension(0) * get_dimension(1); + } + + inline const vec2I& operator[](uint32_t i) const + { + assert(i < 2); + return m_corner[i]; + } + inline vec2I& operator[](uint32_t i) + { + assert(i < 2); + return m_corner[i]; + } + + inline rect& translate(int x_ofs, int y_ofs) + { + m_corner[0][0] += x_ofs; + m_corner[0][1] += y_ofs; + m_corner[1][0] += x_ofs; + m_corner[1][1] += y_ofs; + return *this; + } + + inline rect& init_expand() + { + m_corner[0].set(INT_MAX); + m_corner[1].set(INT_MIN); + return *this; + } + + inline rect& expand(int x, int y) + { + m_corner[0][0] = std::min(m_corner[0][0], x); + m_corner[0][1] = std::min(m_corner[0][1], y); + m_corner[1][0] = std::max(m_corner[1][0], x + 1); + m_corner[1][1] = std::max(m_corner[1][1], y + 1); + return *this; + } + + inline rect& expand(const rect& r) + { + m_corner[0][0] = std::min(m_corner[0][0], r[0][0]); + m_corner[0][1] = std::min(m_corner[0][1], r[0][1]); + m_corner[1][0] = std::max(m_corner[1][0], r[1][0]); + m_corner[1][1] = std::max(m_corner[1][1], r[1][1]); + return *this; + } + + inline bool touches(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (r[1][i] <= m_corner[0][i]) + return false; + else if (r[0][i] >= m_corner[1][i]) + return false; + } + + return true; + } + + inline bool fully_within(const rect& r) const + { + for (uint32_t i = 0; i < 2; i++) + { + if (m_corner[0][i] < r[0][i]) + return false; + else if (m_corner[1][i] > r[1][i]) + return false; + } + + return true; + } + + inline bool intersect(const rect& r) + { + if (!touches(r)) + { + clear(); + return false; + } + + for (uint32_t i = 0; i < 2; i++) + { + m_corner[0][i] = std::max(m_corner[0][i], r[0][i]); + m_corner[1][i] = std::min(m_corner[1][i], r[1][i]); + } + + return true; + } + + inline bool contains(int x, int y) const + { + return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && + (y >= m_corner[0][1]) && (y < m_corner[1][1]); + } + + inline bool contains(const vec2I& p) const + { + return contains(p[0], p[1]); + } + +private: + vec2I m_corner[2]; +}; + +inline rect make_rect(uint32_t width, uint32_t height) +{ + return rect(0, 0, width, height); +} + +struct color_quad_u8 +{ +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) +#endif + union + { + uint8_t m_c[4]; + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + }; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + inline color_quad_u8(eClear) : color_quad_u8(0, 0, 0, 0) { } + + inline color_quad_u8(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + set(cr, cg, cb, ca); + } + + inline color_quad_u8(uint8_t cy = 0, uint8_t ca = 255) + { + set(cy, ca); + } + + inline void clear() + { + set(0, 0, 0, 0); + } + + inline color_quad_u8& set(uint8_t cy, uint8_t ca = 255) + { + m_c[0] = cy; + m_c[1] = cy; + m_c[2] = cy; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set(uint8_t cr, uint8_t cg, uint8_t cb, uint8_t ca) + { + m_c[0] = cr; + m_c[1] = cg; + m_c[2] = cb; + m_c[3] = ca; + return *this; + } + + inline color_quad_u8& set_clamped(int cr, int cg, int cb, int ca) + { + m_c[0] = (uint8_t)clamp(cr, 0, 255); + m_c[1] = (uint8_t)clamp(cg, 0, 255); + m_c[2] = (uint8_t)clamp(cb, 0, 255); + m_c[3] = (uint8_t)clamp(ca, 0, 255); + return *this; + } + + color_quad_u8& set_alpha(int ca) { a = (uint8_t)clamp(ca, 0, 255); return *this; } + + inline uint8_t& operator[] (uint32_t i) { assert(i < 4); return m_c[i]; } + inline uint8_t operator[] (uint32_t i) const { assert(i < 4); return m_c[i]; } + + inline int get_luma() const { return (13938U * m_c[0] + 46869U * m_c[1] + 4729U * m_c[2] + 32768U) >> 16U; } // REC709 weightings + + inline bool operator== (const color_quad_u8& other) const + { + return (m_c[0] == other.m_c[0]) && (m_c[1] == other.m_c[1]) && (m_c[2] == other.m_c[2]) && (m_c[3] == other.m_c[3]); + } + + inline bool operator!= (const color_quad_u8& other) const + { + return !(*this == other); + } + + inline uint32_t squared_distance(const color_quad_u8& c, bool alpha = true) const + { + return square(r - c.r) + square(g - c.g) + square(b - c.b) + (alpha ? square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad_u8& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } +}; +typedef vector color_quad_u8_vec; + +inline uint32_t color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) +{ + if (perceptual) + { + const float l1 = e1.r * .2126f + e1.g * .715f + e1.b * .0722f; + const float cr1 = e1.r - l1; + const float cb1 = e1.b - l1; + + const float l2 = e2.r * .2126f + e2.g * .715f + e2.b * .0722f; + const float cr2 = e2.r - l2; + const float cb2 = e2.b - l2; + + const float dl = l1 - l2; + const float dcr = cr1 - cr2; + const float dcb = cb1 - cb2; + + uint32_t d = static_cast( + 32.0f * 4.0f * dl * dl + + 32.0f * 2.0f * (.5f / (1.0f - .2126f)) * (.5f / (1.0f - .2126f)) * dcr * dcr + + 32.0f * .25f * (.5f / (1.0f - .0722f)) * (.5f / (1.0f - .0722f)) * dcb * dcb); + + if (alpha) + { + int da = (int)e1.a - (int)e2.a; + + d += static_cast(128.0f * da * da); + } + + return d; + } + else + return e1.squared_distance(e2, alpha); +} + +extern color_quad_u8 g_white_color_u8, g_black_color_u8, g_red_color_u8, g_green_color_u8, g_blue_color_u8, g_yellow_color_u8, g_purple_color_u8, g_magenta_color_u8, g_cyan_color_u8; + +class image_u8 +{ +public: + image_u8() : + m_width(0), m_height(0), + m_clip_rect(cClear) + { + } + + image_u8(uint32_t width, uint32_t height) : + m_width(width), m_height(height), + m_clip_rect(0, 0, width, height) + { + m_pixels.resize(width * height); + } + + inline const color_quad_u8_vec& get_pixels() const { return m_pixels; } + inline color_quad_u8_vec& get_pixels() { return m_pixels; } + + inline uint32_t width() const { return m_width; } + inline uint32_t height() const { return m_height; } + inline uint32_t total_pixels() const { return m_width * m_height; } + + inline const rect& get_clip_rect() const { return m_clip_rect; } + + inline void set_clip_rect(const rect& r) + { + assert((r.get_left() >= 0) && (r.get_top() >= 0) && (r.get_right() <= (int)m_width) && (r.get_bottom() <= (int)m_height)); + + m_clip_rect = r; + } + + inline void clear_clip_rect() { m_clip_rect.set(0, 0, m_width, m_height); } + + inline bool is_clipped(int x, int y) const { return !m_clip_rect.contains(x, y); } + + inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } + + inline color_quad_u8& operator()(uint32_t x, uint32_t y) { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + inline const color_quad_u8& operator()(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); return m_pixels[x + m_width * y]; } + + image_u8& clear() + { + m_width = m_height = 0; + m_clip_rect.clear(); + m_pixels.clear(); + return *this; + } + + image_u8& init(uint32_t width, uint32_t height) + { + clear(); + + m_width = width; + m_height = height; + m_clip_rect.set(0, 0, width, height); + m_pixels.resize(width * height); + return *this; + } + + image_u8& set_all(const color_quad_u8& p) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = p; + return *this; + } + + inline const color_quad_u8& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline color_quad_u8& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline image_u8& set_pixel_clipped(int x, int y, const color_quad_u8& c) + { + if (!is_clipped(x, y)) + (*this)(x, y) = c; + return *this; + } + + inline image_u8& fill_box(int x, int y, int w, int h, const color_quad_u8& c) + { + for (int y_ofs = 0; y_ofs < h; y_ofs++) + for (int x_ofs = 0; x_ofs < w; x_ofs++) + set_pixel_clipped(x + x_ofs, y + y_ofs, c); + return *this; + } + + void invert_box(int inX, int inY, int inW, int inH) + { + for (int y = 0; y < inH; y++) + { + const uint32_t yy = inY + y; + + for (int x = 0; x < inW; x++) + { + const uint32_t xx = inX + x; + + if (is_clipped(xx, yy)) + continue; + + color_quad_u8 c((*this)(xx, yy)); + + c.r = 255 - c.r; + c.g = 255 - c.g; + c.b = 255 - c.b; + + set_pixel_clipped(xx, yy, c); + } + } + } + + image_u8& crop_dup_borders(uint32_t w, uint32_t h) + { + const uint32_t orig_w = m_width, orig_h = m_height; + + crop(w, h); + + if (orig_w && orig_h) + { + if (m_width > orig_w) + { + for (uint32_t x = orig_w; x < m_width; x++) + for (uint32_t y = 0; y < m_height; y++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + + if (m_height > orig_h) + { + for (uint32_t y = orig_h; y < m_height; y++) + for (uint32_t x = 0; x < m_width; x++) + set_pixel_clipped(x, y, get_clamped(std::min(x, orig_w - 1U), std::min(y, orig_h - 1U))); + } + } + return *this; + } + + image_u8& crop(uint32_t new_width, uint32_t new_height) + { + if ((m_width == new_width) && (m_height == new_height)) + return *this; + + image_u8 new_image(new_width, new_height); + + const uint32_t w = std::min(m_width, new_width); + const uint32_t h = std::min(m_height, new_height); + + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + new_image(x, y) = (*this)(x, y); + + return swap(new_image); + } + + image_u8& swap(image_u8& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pixels, other.m_pixels); + std::swap(m_clip_rect, other.m_clip_rect); + return *this; + } + + // No clipping + inline void get_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(pPixels + y * width, &(*this)(bx * width, by * height + y), width * sizeof(color_quad_u8)); + } + + inline void get_block_clamped(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, color_quad_u8* pPixels) const + { + for (uint32_t y = 0; y < height; y++) + for (uint32_t x = 0; x < width; x++) + pPixels[x + y * width] = get_clamped(bx * width + x, by * height + y); + } + + // No clipping + inline void set_block(uint32_t bx, uint32_t by, uint32_t width, uint32_t height, const color_quad_u8* pPixels) + { + assert((bx * width + width) <= m_width); + assert((by * height + height) <= m_height); + + for (uint32_t y = 0; y < height; y++) + memcpy(&(*this)(bx * width, by * height + y), pPixels + y * width, width * sizeof(color_quad_u8)); + } + + image_u8& swizzle(uint32_t r, uint32_t g, uint32_t b, uint32_t a) + { + assert((r | g | b | a) <= 3); + for (uint32_t y = 0; y < m_height; y++) + { + for (uint32_t x = 0; x < m_width; x++) + { + color_quad_u8 tmp((*this)(x, y)); + (*this)(x, y).set(tmp[r], tmp[g], tmp[b], tmp[a]); + } + } + + return *this; + } + + struct pixel_coord + { + uint16_t m_x, m_y; + pixel_coord() { } + pixel_coord(uint32_t x, uint32_t y) : m_x((uint16_t)x), m_y((uint16_t)y) { } + }; + + uint32_t flood_fill(int x, int y, const color_quad_u8& c, const color_quad_u8& b, vector* pSet_pixels = nullptr); + + void draw_line(int xs, int ys, int xe, int ye, const color_quad_u8& color); + + inline void set_pixel_clipped_alphablend(int x, int y, const color_quad_u8& c) + { + if (is_clipped(x, y)) + return; + + color_quad_u8 ct(m_pixels[x + y * m_width]); + + ct.r = static_cast(ct.r + ((c.r - ct.r) * c.a) / 255); + ct.g = static_cast(ct.g + ((c.g - ct.g) * c.a) / 255); + ct.b = static_cast(ct.b + ((c.b - ct.b) * c.a) / 255); + + m_pixels[x + y * m_width] = ct; + } + +private: + color_quad_u8_vec m_pixels; + uint32_t m_width, m_height; + rect m_clip_rect; + + struct fill_segment + { + int16_t m_y, m_xl, m_xr, m_dy; + + fill_segment(int y, int xl, int xr, int dy) : + m_y((int16_t)y), m_xl((int16_t)xl), m_xr((int16_t)xr), m_dy((int16_t)dy) + { + } + }; + + inline bool flood_fill_is_inside(int x, int y, const color_quad_u8& b) const + { + if (is_clipped(x, y)) + return false; + + return (*this)(x, y) == b; + } + + void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_quad_u8& color); + + void draw_aaline_pixel(int x, int y, int a, color_quad_u8 color) + { + color.a = static_cast(255 - a); + set_pixel_clipped_alphablend(x, y, color); + } +}; + +bool load_png(const char* pFilename, image_u8& img); + +bool save_png(const char* pFilename, const image_u8& img, bool save_alpha); + +class image_metrics +{ +public: + double m_max, m_mean, m_mean_squared, m_root_mean_squared, m_peak_snr; + + image_metrics() + { + clear(); + } + + void clear() + { + memset(this, 0, sizeof(*this)); + } + + void compute(const image_u8& a, const image_u8& b, uint32_t first_channel, uint32_t num_channels) + { + const bool average_component_error = true; + + const uint32_t width = std::min(a.width(), b.width()); + const uint32_t height = std::min(a.height(), b.height()); + + assert((first_channel < 4U) && (first_channel + num_channels <= 4U)); + + // Histogram approach originally due to Charles Bloom. + double hist[256]; + memset(hist, 0, sizeof(hist)); + + for (uint32_t y = 0; y < height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + if (!num_channels) + hist[iabs(ca.get_luma() - cb.get_luma())]++; + else + { + for (uint32_t c = 0; c < num_channels; c++) + hist[iabs(ca[first_channel + c] - cb[first_channel + c])]++; + } + } + } + + m_max = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint32_t i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + + m_max = std::max(m_max, i); + + double x = i * hist[i]; + + sum += x; + sum2 += i * x; + } + + // See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html + double total_values = width * height; + + if (average_component_error) + total_values *= clamp(num_channels, 1, 4); + + m_mean = clamp(sum / total_values, 0.0f, 255.0f); + m_mean_squared = clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); + + m_root_mean_squared = sqrt(m_mean_squared); + + if (!m_root_mean_squared) + m_peak_snr = 100.0f; + else + m_peak_snr = clamp(log10(255.0f / m_root_mean_squared) * 20.0f, 0.0f, 100.0f); + } +}; + +class imagef +{ +public: + imagef() : + m_width(0), m_height(0), m_pitch(0) + { + } + + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + m_width(0), m_height(0), m_pitch(0) + { + resize(w, h, p); + } + + imagef(const imagef& other) : + m_width(0), m_height(0), m_pitch(0) + { + *this = other; + } + + imagef& swap(imagef& other) + { + std::swap(m_width, other.m_width); + std::swap(m_height, other.m_height); + std::swap(m_pitch, other.m_pitch); + m_pixels.swap(other.m_pixels); + return *this; + } + + imagef& operator= (const imagef& rhs) + { + if (this != &rhs) + { + m_width = rhs.m_width; + m_height = rhs.m_height; + m_pitch = rhs.m_pitch; + m_pixels = rhs.m_pixels; + } + return *this; + } + + imagef& clear() + { + m_width = 0; + m_height = 0; + m_pitch = 0; + m_pixels.resize(0); + return *this; + } + + imagef& set(const image_u8& src, const vec4F& scale = vec4F(1), const vec4F& bias = vec4F(0)) + { + const uint32_t width = src.width(); + const uint32_t height = src.height(); + + resize(width, height); + + for (int y = 0; y < (int)height; y++) + { + for (uint32_t x = 0; x < width; x++) + { + const color_quad_u8& src_pixel = src(x, y); + (*this)(x, y).set((float)src_pixel.r * scale[0] + bias[0], (float)src_pixel.g * scale[1] + bias[1], (float)src_pixel.b * scale[2] + bias[2], (float)src_pixel.a * scale[3] + bias[3]); + } + } + + return *this; + } + + imagef& resize(const imagef& other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return resize(other.get_width(), other.get_height(), p, background); + } + + imagef& resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + return crop(w, h, p, background); + } + + imagef& set_all(const vec4F& c) + { + for (uint32_t i = 0; i < m_pixels.size(); i++) + m_pixels[i] = c; + return *this; + } + + imagef& fill_box(uint32_t x, uint32_t y, uint32_t w, uint32_t h, const vec4F& c) + { + for (uint32_t iy = 0; iy < h; iy++) + for (uint32_t ix = 0; ix < w; ix++) + set_pixel_clipped(x + ix, y + iy, c); + return *this; + } + + imagef& crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0, 0, 0, 1)) + { + if (p == UINT32_MAX) + p = w; + + if ((w == m_width) && (m_height == h) && (m_pitch == p)) + return *this; + + if ((!w) || (!h) || (!p)) + { + clear(); + return *this; + } + + vec4F_array cur_state; + cur_state.swap(m_pixels); + + m_pixels.resize(p * h); + + for (uint32_t y = 0; y < h; y++) + { + for (uint32_t x = 0; x < w; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixels[x + y * p] = cur_state[x + y * m_pitch]; + else + m_pixels[x + y * p] = background; + } + } + + m_width = w; + m_height = h; + m_pitch = p; + + return *this; + } + + inline const vec4F& operator() (uint32_t x, uint32_t y) const { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + inline vec4F& operator() (uint32_t x, uint32_t y) { assert(x < m_width&& y < m_height); return m_pixels[x + y * m_pitch]; } + + inline const vec4F& get_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + inline vec4F& get_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width - 1), clamp(y, 0, m_height - 1)); } + + inline const vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) const + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline vec4F& get_clamped_or_wrapped(int x, int y, bool wrap_u, bool wrap_v) + { + x = wrap_u ? posmod(x, m_width) : clamp(x, 0, m_width - 1); + y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); + return m_pixels[x + y * m_pitch]; + } + + inline imagef& set_pixel_clipped(int x, int y, const vec4F& c) + { + if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) + (*this)(x, y) = c; + return *this; + } + + // Very straightforward blit with full clipping. Not fast, but it works. + imagef& blit(const imagef& src, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y) + { + for (int y = 0; y < src_h; y++) + { + const int sy = src_y + y; + if (sy < 0) + continue; + else if (sy >= (int)src.get_height()) + break; + + for (int x = 0; x < src_w; x++) + { + const int sx = src_x + x; + if (sx < 0) + continue; + else if (sx >= (int)src.get_height()) + break; + + set_pixel_clipped(dst_x + x, dst_y + y, src(sx, sy)); + } + } + + return *this; + } + + const imagef& extract_block_clamped(vec4F* pDst, uint32_t src_x, uint32_t src_y, uint32_t w, uint32_t h) const + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + *pDst++ = get_clamped(src_x + x, src_y + y); + return *this; + } + + imagef& set_block_clipped(const vec4F* pSrc, uint32_t dst_x, uint32_t dst_y, uint32_t w, uint32_t h) + { + for (uint32_t y = 0; y < h; y++) + for (uint32_t x = 0; x < w; x++) + set_pixel_clipped(dst_x + x, dst_y + y, *pSrc++); + return *this; + } + + inline uint32_t get_width() const { return m_width; } + inline uint32_t get_height() const { return m_height; } + inline uint32_t get_pitch() const { return m_pitch; } + inline uint32_t get_total_pixels() const { return m_width * m_height; } + + inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; } + inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; } + inline uint32_t get_total_blocks(uint32_t w, uint32_t h) const { return get_block_width(w) * get_block_height(h); } + + inline const vec4F_array& get_pixels() const { return m_pixels; } + inline vec4F_array& get_pixels() { return m_pixels; } + + inline const vec4F* get_ptr() const { return &m_pixels[0]; } + inline vec4F* get_ptr() { return &m_pixels[0]; } + +private: + uint32_t m_width, m_height, m_pitch; // all in pixels + vec4F_array m_pixels; +}; + +enum +{ + cComputeGaussianFlagNormalize = 1, + cComputeGaussianFlagPrint = 2, + cComputeGaussianFlagNormalizeCenterToOne = 4 +}; + +// size_x/y should be odd +void compute_gaussian_kernel(float* pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags); + +void gaussian_filter(imagef& dst, const imagef& orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping = false, uint32_t width_divisor = 1, uint32_t height_divisor = 1); + +vec4F compute_ssim(const imagef& a, const imagef& b); + +vec4F compute_ssim(const image_u8& a, const image_u8& b, bool luma); + +struct block8 +{ + uint64_t m_vals[1]; +}; + +typedef vector block8_vec; + +struct block16 +{ + uint64_t m_vals[2]; +}; + +typedef vector block16_vec; + +//bool save_dds(const char* pFilename, uint32_t width, uint32_t height, const void* pBlocks, uint32_t pixel_format_bpp, DXGI_FORMAT dxgi_format, bool srgb, bool force_dx10_header); + +void strip_extension(string& s); +void strip_path(string& s); + +uint32_t hash_hsieh(const uint8_t* pBuf, size_t len); + +// https://www.johndcook.com/blog/standard_deviation/ +// This class is for small numbers of integers, so precision shouldn't be an issue. +class tracked_stat +{ +public: + tracked_stat() { clear(); } + + void clear() { m_num = 0; m_total = 0; m_total2 = 0; } + + void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; } + + tracked_stat& operator += (uint32_t val) { update(val); return *this; } + + uint32_t get_number_of_values() const { return m_num; } + uint64_t get_total() const { return m_total; } + uint64_t get_total2() const { return m_total2; } + + float get_mean() const { return m_num ? (float)m_total / m_num : 0.0f; }; + + float get_variance() const { return m_num ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * m_num) : 0.0f; } + float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; } + + float get_sample_variance() const { return (m_num > 1) ? ((float)(m_num * m_total2 - m_total * m_total)) / (m_num * (m_num - 1)) : 0.0f; } + float get_sample_std_dev() const { return (m_num > 1) ? sqrtf(get_sample_variance()) : 0.0f; } + +private: + uint32_t m_num; + uint64_t m_total; + uint64_t m_total2; +}; + +inline float compute_covariance(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, bool sample) +{ + const uint32_t n = a.get_number_of_values(); + assert(n == b.get_number_of_values()); + + if (!n) + { + assert(0); + return 0.0f; + } + if ((sample) && (n == 1)) + { + assert(0); + return 0; + } + + const float mean_a = a.get_mean(); + const float mean_b = b.get_mean(); + + float total = 0.0f; + for (uint32_t i = 0; i < n; i++) + total += (pA[i] - mean_a) * (pB[i] - mean_b); + + return total / (sample ? (n - 1) : n); +} + +inline float compute_correlation_coefficient(const float* pA, const float* pB, const tracked_stat& a, const tracked_stat& b, float c, bool sample) +{ + if (!a.get_number_of_values()) + return 1.0f; + + float covar = compute_covariance(pA, pB, a, b, sample); + float std_dev_a = sample ? a.get_sample_std_dev() : a.get_std_dev(); + float std_dev_b = sample ? b.get_sample_std_dev() : b.get_std_dev(); + float denom = std_dev_a * std_dev_b + c; + + if (denom < .0000125f) + return 1.0f; + + float result = (covar + c) / denom; + + return clamp(result, -1.0f, 1.0f); +} + +float compute_block_max_std_dev(const color_quad_u8* pPixels, uint32_t block_width, uint32_t block_height, uint32_t num_comps); + +class rand +{ + std::mt19937 m_mt; + +public: + rand() { } + + rand(uint32_t s) { seed(s); } + void seed(uint32_t s) { m_mt.seed(s); } + + // between [l,h] + int irand(int l, int h) { std::uniform_int_distribution d(l, h); return d(m_mt); } + + uint32_t urand32() { return static_cast(irand(INT32_MIN, INT32_MAX)); } + + bool bit() { return irand(0, 1) == 1; } + + uint8_t byte() { return static_cast(urand32()); } + + // between [l,h) + float frand(float l, float h) { std::uniform_real_distribution d(l, h); return d(m_mt); } + + float gaussian(float mean, float stddev) { std::normal_distribution d(mean, stddev); return d(m_mt); } +}; + +bool save_astc_file(const char* pFilename, block16_vec& blocks, uint32_t width, uint32_t height, uint32_t block_width, uint32_t block_height); +bool load_astc_file(const char* pFilename, block16_vec& blocks, uint32_t& width, uint32_t& height, uint32_t& block_width, uint32_t& block_height); + +class value_stats +{ +public: + value_stats() + { + clear(); + } + + void clear() + { + m_sum = 0; + m_sum2 = 0; + m_num = 0; + m_min = 1e+39; + m_max = -1e+39; + m_vals.clear(); + } + + void add(double val) + { + m_sum += val; + m_sum2 += val * val; + + m_num++; + + m_min = std::min(m_min, val); + m_max = std::max(m_max, val); + + m_vals.push_back(val); + } + + void add(int val) + { + add(static_cast(val)); + } + + void add(uint32_t val) + { + add(static_cast(val)); + } + + void add(int64_t val) + { + add(static_cast(val)); + } + + void add(uint64_t val) + { + add(static_cast(val)); + } + + void print(const char* pPrefix = "") + { + if (!m_vals.size()) + printf("%s: Empty\n", pPrefix); + else + printf("%s: Samples: %llu, Total: %f, Avg: %f, Std Dev: %f, Min: %f, Max: %f, Mean: %f\n", + pPrefix, (unsigned long long)get_num(), get_total(), get_average(), get_std_dev(), get_min(), get_max(), get_mean()); + } + + double get_total() const + { + return m_sum; + } + + double get_average() const + { + return m_num ? (m_sum / m_num) : 0.0f; + } + + double get_min() const + { + return m_min; + } + + double get_max() const + { + return m_max; + } + + uint64_t get_num() const + { + return m_num; + } + + double get_val(uint32_t index) const + { + return m_vals[index]; + } + + // Returns population standard deviation + double get_std_dev() const + { + if (!m_num) + return 0.0f; + + // TODO: FP precision + return sqrt((m_sum2 - ((m_sum * m_sum) / m_num)) / m_num); + } + + double get_mean() const + { + if (!m_num) + return 0.0f; + + vector sorted_vals(m_vals); + std::sort(sorted_vals.begin(), sorted_vals.end()); + + return sorted_vals[sorted_vals.size() / 2]; + } + +private: + double m_sum; + double m_sum2; + + uint64_t m_num; + + double m_min; + double m_max; + + mutable vector m_vals; +}; + +uint32_t get_deflate_size(const void* pData, size_t data_size); + +} // namespace utils + +#ifdef _MSC_VER +#pragma warning (pop) +#endif diff --git a/libkram/cgltf/LICENSE b/libkram/cgltf/LICENSE new file mode 100644 index 00000000..599d9341 --- /dev/null +++ b/libkram/cgltf/LICENSE @@ -0,0 +1,7 @@ +Copyright (c) 2018-2021 Johannes Kuhlmann + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/libkram/cgltf/README.md b/libkram/cgltf/README.md new file mode 100644 index 00000000..e2aa6b40 --- /dev/null +++ b/libkram/cgltf/README.md @@ -0,0 +1,162 @@ +# :diamond_shape_with_a_dot_inside: cgltf +**Single-file/stb-style C glTF loader and writer** + +[![Build Status](https://github.com/jkuhlmann/cgltf/workflows/build/badge.svg)](https://github.com/jkuhlmann/cgltf/actions) + +Used in: [bgfx](https://github.com/bkaradzic/bgfx), [Filament](https://github.com/google/filament), [gltfpack](https://github.com/zeux/meshoptimizer/tree/master/gltf), [raylib](https://github.com/raysan5/raylib), [Unigine](https://developer.unigine.com/en/docs/2.14.1/third_party?rlang=cpp#cgltf), and more! + +## Usage: Loading +Loading from file: +```c +#define CGLTF_IMPLEMENTATION +#include "cgltf.h" + +cgltf_options options = {0}; +cgltf_data* data = NULL; +cgltf_result result = cgltf_parse_file(&options, "scene.gltf", &data); +if (result == cgltf_result_success) +{ + /* TODO make awesome stuff */ + cgltf_free(data); +} +``` + +Loading from memory: +```c +#define CGLTF_IMPLEMENTATION +#include "cgltf.h" + +void* buf; /* Pointer to glb or gltf file data */ +size_t size; /* Size of the file data */ + +cgltf_options options = {0}; +cgltf_data* data = NULL; +cgltf_result result = cgltf_parse(&options, buf, size, &data); +if (result == cgltf_result_success) +{ + /* TODO make awesome stuff */ + cgltf_free(data); +} +``` + +Note that cgltf does not load the contents of extra files such as buffers or images into memory by default. You'll need to read these files yourself using URIs from `data.buffers[]` or `data.images[]` respectively. +For buffer data, you can alternatively call `cgltf_load_buffers`, which will use `FILE*` APIs to open and read buffer files. This automatically decodes base64 data URIs in buffers. For data URIs in images, you will need to use `cgltf_load_buffer_base64`. + +**For more in-depth documentation and a description of the public interface refer to the top of the `cgltf.h` file.** + +## Usage: Writing +When writing glTF data, you need a valid `cgltf_data` structure that represents a valid glTF document. You can construct such a structure yourself or load it using the loader functions described above. The writer functions do not deallocate any memory. So, you either have to do it manually or call `cgltf_free()` if you got the data by loading it from a glTF document. + +Writing to file: +```c +#define CGLTF_IMPLEMENTATION +#define CGLTF_WRITE_IMPLEMENTATION +#include "cgltf_write.h" + +cgltf_options options = {0}; +cgltf_data* data = /* TODO must be valid data */; +cgltf_result result = cgltf_write_file(&options, "out.gltf", data); +if (result != cgltf_result_success) +{ + /* TODO handle error */ +} +``` + +Writing to memory: +```c +#define CGLTF_IMPLEMENTATION +#define CGLTF_WRITE_IMPLEMENTATION +#include "cgltf_write.h" +cgltf_options options = {0}; +cgltf_data* data = /* TODO must be valid data */; + +cgltf_size size = cgltf_write(&options, NULL, 0, data); + +char* buf = malloc(size); + +cgltf_size written = cgltf_write(&options, buf, size, data); +if (written != size) +{ + /* TODO handle error */ +} +``` + +Note that cgltf does not write the contents of extra files such as buffers or images. You'll need to write this data yourself. + +**For more in-depth documentation and a description of the public interface refer to the top of the `cgltf_write.h` file.** + + +## Features +cgltf supports core glTF 2.0: +- glb (binary files) and gltf (JSON files) +- meshes (including accessors, buffer views, buffers) +- materials (including textures, samplers, images) +- scenes and nodes +- skins +- animations +- cameras +- morph targets +- extras data + +cgltf also supports some glTF extensions: +- EXT_mesh_gpu_instancing +- EXT_meshopt_compression +- KHR_draco_mesh_compression (requires a library like [Google's Draco](https://github.com/google/draco) for decompression though) +- KHR_lights_punctual +- KHR_materials_clearcoat +- KHR_materials_emissive_strength +- KHR_materials_ior +- KHR_materials_iridescence +- KHR_materials_pbrSpecularGlossiness +- KHR_materials_sheen +- KHR_materials_specular +- KHR_materials_transmission +- KHR_materials_unlit +- KHR_materials_variants +- KHR_materials_volume +- KHR_materials_anisotropy +- KHR_texture_basisu (requires a library like [Binomial Basisu](https://github.com/BinomialLLC/basis_universal) for transcoding to native compressed texture) +- KHR_texture_transform + +cgltf does **not** yet support unlisted extensions. However, unlisted extensions can be accessed via "extensions" member on objects. + +## Building +The easiest approach is to integrate the `cgltf.h` header file into your project. If you are unfamiliar with single-file C libraries (also known as stb-style libraries), this is how it goes: + +1. Include `cgltf.h` where you need the functionality. +1. Have exactly one source file that defines `CGLTF_IMPLEMENTATION` before including `cgltf.h`. +1. Use the cgltf functions as described above. + +Support for writing can be found in a separate file called `cgltf_write.h` (which includes `cgltf.h`). Building it works analogously using the `CGLTF_WRITE_IMPLEMENTATION` define. + +## Contributing +Everyone is welcome to contribute to the library. If you find any problems, you can submit them using [GitHub's issue system](https://github.com/jkuhlmann/cgltf/issues). If you want to contribute code, you should fork the project and then send a pull request. + + +## Dependencies +None. + +C headers being used by the implementation: +``` +#include +#include +#include +#include +#include +#include +#include // If asserts are enabled. +``` + +Note, this library has a copy of the [JSMN JSON parser](https://github.com/zserge/jsmn) embedded in its source. + +## Testing +There is a Python script in the `test/` folder that retrieves the glTF 2.0 sample files from the glTF-Sample-Models repository (https://github.com/KhronosGroup/glTF-Sample-Models/tree/master/2.0) and runs the library against all gltf and glb files. + +Here's one way to build and run the test: + + cd test ; mkdir build ; cd build ; cmake .. -DCMAKE_BUILD_TYPE=Debug + make -j + cd .. + ./test_all.py + +There is also a llvm-fuzz test in `fuzz/`. See http://llvm.org/docs/LibFuzzer.html for more information. diff --git a/libkram/cgltf/cgltf.h b/libkram/cgltf/cgltf.h new file mode 100644 index 00000000..af24c65e --- /dev/null +++ b/libkram/cgltf/cgltf.h @@ -0,0 +1,7050 @@ +/** + * cgltf - a single-file glTF 2.0 parser written in C99. + * + * Version: 1.13 + * + * Website: https://github.com/jkuhlmann/cgltf + * + * Distributed under the MIT License, see notice at the end of this file. + * + * Building: + * Include this file where you need the struct and function + * declarations. Have exactly one source file where you define + * `CGLTF_IMPLEMENTATION` before including this file to get the + * function definitions. + * + * Reference: + * `cgltf_result cgltf_parse(const cgltf_options*, const void*, + * cgltf_size, cgltf_data**)` parses both glTF and GLB data. If + * this function returns `cgltf_result_success`, you have to call + * `cgltf_free()` on the created `cgltf_data*` variable. + * Note that contents of external files for buffers and images are not + * automatically loaded. You'll need to read these files yourself using + * URIs in the `cgltf_data` structure. + * + * `cgltf_options` is the struct passed to `cgltf_parse()` to control + * parts of the parsing process. You can use it to force the file type + * and provide memory allocation as well as file operation callbacks. + * Should be zero-initialized to trigger default behavior. + * + * `cgltf_data` is the struct allocated and filled by `cgltf_parse()`. + * It generally mirrors the glTF format as described by the spec (see + * https://github.com/KhronosGroup/glTF/tree/master/specification/2.0). + * + * `void cgltf_free(cgltf_data*)` frees the allocated `cgltf_data` + * variable. + * + * `cgltf_result cgltf_load_buffers(const cgltf_options*, cgltf_data*, + * const char* gltf_path)` can be optionally called to open and read buffer + * files using the `FILE*` APIs. The `gltf_path` argument is the path to + * the original glTF file, which allows the parser to resolve the path to + * buffer files. + * + * `cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, + * cgltf_size size, const char* base64, void** out_data)` decodes + * base64-encoded data content. Used internally by `cgltf_load_buffers()`. + * This is useful when decoding data URIs in images. + * + * `cgltf_result cgltf_parse_file(const cgltf_options* options, const + * char* path, cgltf_data** out_data)` can be used to open the given + * file using `FILE*` APIs and parse the data using `cgltf_parse()`. + * + * `cgltf_result cgltf_validate(cgltf_data*)` can be used to do additional + * checks to make sure the parsed glTF data is valid. + * + * `cgltf_node_transform_local` converts the translation / rotation / scale properties of a node + * into a mat4. + * + * `cgltf_node_transform_world` calls `cgltf_node_transform_local` on every ancestor in order + * to compute the root-to-node transformation. + * + * `cgltf_accessor_unpack_floats` reads in the data from an accessor, applies sparse data (if any), + * and converts them to floating point. Assumes that `cgltf_load_buffers` has already been called. + * By passing null for the output pointer, users can find out how many floats are required in the + * output buffer. + * + * `cgltf_num_components` is a tiny utility that tells you the dimensionality of + * a certain accessor type. This can be used before `cgltf_accessor_unpack_floats` to help allocate + * the necessary amount of memory. `cgltf_component_size` and `cgltf_calc_size` exist for + * similar purposes. + * + * `cgltf_accessor_read_float` reads a certain element from a non-sparse accessor and converts it to + * floating point, assuming that `cgltf_load_buffers` has already been called. The passed-in element + * size is the number of floats in the output buffer, which should be in the range [1, 16]. Returns + * false if the passed-in element_size is too small, or if the accessor is sparse. + * + * `cgltf_accessor_read_uint` is similar to its floating-point counterpart, but limited to reading + * vector types and does not support matrix types. The passed-in element size is the number of uints + * in the output buffer, which should be in the range [1, 4]. Returns false if the passed-in + * element_size is too small, or if the accessor is sparse. + * + * `cgltf_accessor_read_index` is similar to its floating-point counterpart, but it returns size_t + * and only works with single-component data types. + * + * `cgltf_copy_extras_json` allows users to retrieve the "extras" data that can be attached to many + * glTF objects (which can be arbitrary JSON data). This is a legacy function, consider using + * cgltf_extras::data directly instead. You can parse this data using your own JSON parser + * or, if you've included the cgltf implementation using the integrated JSMN JSON parser. + */ +#ifndef CGLTF_H_INCLUDED__ +#define CGLTF_H_INCLUDED__ + +#include +#include /* For uint8_t, uint32_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef size_t cgltf_size; +typedef long long int cgltf_ssize; +typedef float cgltf_float; +typedef int cgltf_int; +typedef unsigned int cgltf_uint; +typedef int cgltf_bool; + +typedef enum cgltf_file_type +{ + cgltf_file_type_invalid, + cgltf_file_type_gltf, + cgltf_file_type_glb, + cgltf_file_type_max_enum +} cgltf_file_type; + +typedef enum cgltf_result +{ + cgltf_result_success, + cgltf_result_data_too_short, + cgltf_result_unknown_format, + cgltf_result_invalid_json, + cgltf_result_invalid_gltf, + cgltf_result_invalid_options, + cgltf_result_file_not_found, + cgltf_result_io_error, + cgltf_result_out_of_memory, + cgltf_result_legacy_gltf, + cgltf_result_max_enum +} cgltf_result; + +typedef struct cgltf_memory_options +{ + void* (*alloc_func)(void* user, cgltf_size size); + void (*free_func) (void* user, void* ptr); + void* user_data; +} cgltf_memory_options; + +typedef struct cgltf_file_options +{ + cgltf_result(*read)(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, const char* path, cgltf_size* size, void** data); + void (*release)(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, void* data); + void* user_data; +} cgltf_file_options; + +typedef struct cgltf_options +{ + cgltf_file_type type; /* invalid == auto detect */ + cgltf_size json_token_count; /* 0 == auto */ + cgltf_memory_options memory; + cgltf_file_options file; +} cgltf_options; + +typedef enum cgltf_buffer_view_type +{ + cgltf_buffer_view_type_invalid, + cgltf_buffer_view_type_indices, + cgltf_buffer_view_type_vertices, + cgltf_buffer_view_type_max_enum +} cgltf_buffer_view_type; + +typedef enum cgltf_attribute_type +{ + cgltf_attribute_type_invalid, + cgltf_attribute_type_position, + cgltf_attribute_type_normal, + cgltf_attribute_type_tangent, + cgltf_attribute_type_texcoord, + cgltf_attribute_type_color, + cgltf_attribute_type_joints, + cgltf_attribute_type_weights, + cgltf_attribute_type_custom, + cgltf_attribute_type_max_enum +} cgltf_attribute_type; + +typedef enum cgltf_component_type +{ + cgltf_component_type_invalid, + cgltf_component_type_r_8, /* BYTE */ + cgltf_component_type_r_8u, /* UNSIGNED_BYTE */ + cgltf_component_type_r_16, /* SHORT */ + cgltf_component_type_r_16u, /* UNSIGNED_SHORT */ + cgltf_component_type_r_32u, /* UNSIGNED_INT */ + cgltf_component_type_r_32f, /* FLOAT */ + cgltf_component_type_max_enum +} cgltf_component_type; + +typedef enum cgltf_type +{ + cgltf_type_invalid, + cgltf_type_scalar, + cgltf_type_vec2, + cgltf_type_vec3, + cgltf_type_vec4, + cgltf_type_mat2, + cgltf_type_mat3, + cgltf_type_mat4, + cgltf_type_max_enum +} cgltf_type; + +typedef enum cgltf_primitive_type +{ + cgltf_primitive_type_points, + cgltf_primitive_type_lines, + cgltf_primitive_type_line_loop, + cgltf_primitive_type_line_strip, + cgltf_primitive_type_triangles, + cgltf_primitive_type_triangle_strip, + cgltf_primitive_type_triangle_fan, + cgltf_primitive_type_max_enum +} cgltf_primitive_type; + +typedef enum cgltf_alpha_mode +{ + cgltf_alpha_mode_opaque, + cgltf_alpha_mode_mask, + cgltf_alpha_mode_blend, + cgltf_alpha_mode_max_enum +} cgltf_alpha_mode; + +typedef enum cgltf_animation_path_type { + cgltf_animation_path_type_invalid, + cgltf_animation_path_type_translation, + cgltf_animation_path_type_rotation, + cgltf_animation_path_type_scale, + cgltf_animation_path_type_weights, + cgltf_animation_path_type_max_enum +} cgltf_animation_path_type; + +typedef enum cgltf_interpolation_type { + cgltf_interpolation_type_linear, + cgltf_interpolation_type_step, + cgltf_interpolation_type_cubic_spline, + cgltf_interpolation_type_max_enum +} cgltf_interpolation_type; + +typedef enum cgltf_camera_type { + cgltf_camera_type_invalid, + cgltf_camera_type_perspective, + cgltf_camera_type_orthographic, + cgltf_camera_type_max_enum +} cgltf_camera_type; + +typedef enum cgltf_light_type { + cgltf_light_type_invalid, + cgltf_light_type_directional, + cgltf_light_type_point, + cgltf_light_type_spot, + cgltf_light_type_max_enum +} cgltf_light_type; + +typedef enum cgltf_data_free_method { + cgltf_data_free_method_none, + cgltf_data_free_method_file_release, + cgltf_data_free_method_memory_free, + cgltf_data_free_method_max_enum +} cgltf_data_free_method; + +typedef struct cgltf_extras { + cgltf_size start_offset; /* this field is deprecated and will be removed in the future; use data instead */ + cgltf_size end_offset; /* this field is deprecated and will be removed in the future; use data instead */ + + char* data; +} cgltf_extras; + +typedef struct cgltf_extension { + char* name; + char* data; +} cgltf_extension; + +typedef struct cgltf_buffer +{ + char* name; + cgltf_size size; + char* uri; + void* data; /* loaded by cgltf_load_buffers */ + cgltf_data_free_method data_free_method; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_buffer; + +typedef enum cgltf_meshopt_compression_mode { + cgltf_meshopt_compression_mode_invalid, + cgltf_meshopt_compression_mode_attributes, + cgltf_meshopt_compression_mode_triangles, + cgltf_meshopt_compression_mode_indices, + cgltf_meshopt_compression_mode_max_enum +} cgltf_meshopt_compression_mode; + +typedef enum cgltf_meshopt_compression_filter { + cgltf_meshopt_compression_filter_none, + cgltf_meshopt_compression_filter_octahedral, + cgltf_meshopt_compression_filter_quaternion, + cgltf_meshopt_compression_filter_exponential, + cgltf_meshopt_compression_filter_max_enum +} cgltf_meshopt_compression_filter; + +typedef struct cgltf_meshopt_compression +{ + cgltf_buffer* buffer; + cgltf_size offset; + cgltf_size size; + cgltf_size stride; + cgltf_size count; + cgltf_meshopt_compression_mode mode; + cgltf_meshopt_compression_filter filter; +} cgltf_meshopt_compression; + +typedef struct cgltf_buffer_view +{ + char *name; + cgltf_buffer* buffer; + cgltf_size offset; + cgltf_size size; + cgltf_size stride; /* 0 == automatically determined by accessor */ + cgltf_buffer_view_type type; + void* data; /* overrides buffer->data if present, filled by extensions */ + cgltf_bool has_meshopt_compression; + cgltf_meshopt_compression meshopt_compression; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_buffer_view; + +typedef struct cgltf_accessor_sparse +{ + cgltf_size count; + cgltf_buffer_view* indices_buffer_view; + cgltf_size indices_byte_offset; + cgltf_component_type indices_component_type; + cgltf_buffer_view* values_buffer_view; + cgltf_size values_byte_offset; + cgltf_extras extras; + cgltf_extras indices_extras; + cgltf_extras values_extras; + cgltf_size extensions_count; + cgltf_extension* extensions; + cgltf_size indices_extensions_count; + cgltf_extension* indices_extensions; + cgltf_size values_extensions_count; + cgltf_extension* values_extensions; +} cgltf_accessor_sparse; + +typedef struct cgltf_accessor +{ + char* name; + cgltf_component_type component_type; + cgltf_bool normalized; + cgltf_type type; + cgltf_size offset; + cgltf_size count; + cgltf_size stride; + cgltf_buffer_view* buffer_view; + cgltf_bool has_min; + cgltf_float min[16]; + cgltf_bool has_max; + cgltf_float max[16]; + cgltf_bool is_sparse; + cgltf_accessor_sparse sparse; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_accessor; + +typedef struct cgltf_attribute +{ + char* name; + cgltf_attribute_type type; + cgltf_int index; + cgltf_accessor* data; +} cgltf_attribute; + +typedef struct cgltf_image +{ + char* name; + char* uri; + cgltf_buffer_view* buffer_view; + char* mime_type; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_image; + +typedef struct cgltf_sampler +{ + char* name; + cgltf_int mag_filter; + cgltf_int min_filter; + cgltf_int wrap_s; + cgltf_int wrap_t; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_sampler; + +typedef struct cgltf_texture +{ + char* name; + cgltf_image* image; + cgltf_sampler* sampler; + cgltf_bool has_basisu; + cgltf_image* basisu_image; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_texture; + +typedef struct cgltf_texture_transform +{ + cgltf_float offset[2]; + cgltf_float rotation; + cgltf_float scale[2]; + cgltf_bool has_texcoord; + cgltf_int texcoord; +} cgltf_texture_transform; + +typedef struct cgltf_texture_view +{ + cgltf_texture* texture; + cgltf_int texcoord; + cgltf_float scale; /* equivalent to strength for occlusion_texture */ + cgltf_bool has_transform; + cgltf_texture_transform transform; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_texture_view; + +typedef struct cgltf_pbr_metallic_roughness +{ + cgltf_texture_view base_color_texture; + cgltf_texture_view metallic_roughness_texture; + + cgltf_float base_color_factor[4]; + cgltf_float metallic_factor; + cgltf_float roughness_factor; +} cgltf_pbr_metallic_roughness; + +typedef struct cgltf_pbr_specular_glossiness +{ + cgltf_texture_view diffuse_texture; + cgltf_texture_view specular_glossiness_texture; + + cgltf_float diffuse_factor[4]; + cgltf_float specular_factor[3]; + cgltf_float glossiness_factor; +} cgltf_pbr_specular_glossiness; + +typedef struct cgltf_clearcoat +{ + cgltf_texture_view clearcoat_texture; + cgltf_texture_view clearcoat_roughness_texture; + cgltf_texture_view clearcoat_normal_texture; + + cgltf_float clearcoat_factor; + cgltf_float clearcoat_roughness_factor; +} cgltf_clearcoat; + +typedef struct cgltf_transmission +{ + cgltf_texture_view transmission_texture; + cgltf_float transmission_factor; +} cgltf_transmission; + +typedef struct cgltf_ior +{ + cgltf_float ior; +} cgltf_ior; + +typedef struct cgltf_specular +{ + cgltf_texture_view specular_texture; + cgltf_texture_view specular_color_texture; + cgltf_float specular_color_factor[3]; + cgltf_float specular_factor; +} cgltf_specular; + +typedef struct cgltf_volume +{ + cgltf_texture_view thickness_texture; + cgltf_float thickness_factor; + cgltf_float attenuation_color[3]; + cgltf_float attenuation_distance; +} cgltf_volume; + +typedef struct cgltf_sheen +{ + cgltf_texture_view sheen_color_texture; + cgltf_float sheen_color_factor[3]; + cgltf_texture_view sheen_roughness_texture; + cgltf_float sheen_roughness_factor; +} cgltf_sheen; + +typedef struct cgltf_emissive_strength +{ + cgltf_float emissive_strength; +} cgltf_emissive_strength; + +typedef struct cgltf_iridescence +{ + cgltf_float iridescence_factor; + cgltf_texture_view iridescence_texture; + cgltf_float iridescence_ior; + cgltf_float iridescence_thickness_min; + cgltf_float iridescence_thickness_max; + cgltf_texture_view iridescence_thickness_texture; +} cgltf_iridescence; + +typedef struct cgltf_anisotropy +{ + cgltf_float anisotropy_strength; + cgltf_float anisotropy_rotation; + cgltf_texture_view anisotropy_texture; +} cgltf_anisotropy; + +typedef struct cgltf_material +{ + char* name; + cgltf_bool has_pbr_metallic_roughness; + cgltf_bool has_pbr_specular_glossiness; + cgltf_bool has_clearcoat; + cgltf_bool has_transmission; + cgltf_bool has_volume; + cgltf_bool has_ior; + cgltf_bool has_specular; + cgltf_bool has_sheen; + cgltf_bool has_emissive_strength; + cgltf_bool has_iridescence; + cgltf_bool has_anisotropy; + cgltf_pbr_metallic_roughness pbr_metallic_roughness; + cgltf_pbr_specular_glossiness pbr_specular_glossiness; + cgltf_clearcoat clearcoat; + cgltf_ior ior; + cgltf_specular specular; + cgltf_sheen sheen; + cgltf_transmission transmission; + cgltf_volume volume; + cgltf_emissive_strength emissive_strength; + cgltf_iridescence iridescence; + cgltf_anisotropy anisotropy; + cgltf_texture_view normal_texture; + cgltf_texture_view occlusion_texture; + cgltf_texture_view emissive_texture; + cgltf_float emissive_factor[3]; + cgltf_alpha_mode alpha_mode; + cgltf_float alpha_cutoff; + cgltf_bool double_sided; + cgltf_bool unlit; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_material; + +typedef struct cgltf_material_mapping +{ + cgltf_size variant; + cgltf_material* material; + cgltf_extras extras; +} cgltf_material_mapping; + +typedef struct cgltf_morph_target { + cgltf_attribute* attributes; + cgltf_size attributes_count; +} cgltf_morph_target; + +typedef struct cgltf_draco_mesh_compression { + cgltf_buffer_view* buffer_view; + cgltf_attribute* attributes; + cgltf_size attributes_count; +} cgltf_draco_mesh_compression; + +typedef struct cgltf_mesh_gpu_instancing { + cgltf_attribute* attributes; + cgltf_size attributes_count; +} cgltf_mesh_gpu_instancing; + +typedef struct cgltf_primitive { + cgltf_primitive_type type; + cgltf_accessor* indices; + cgltf_material* material; + cgltf_attribute* attributes; + cgltf_size attributes_count; + cgltf_morph_target* targets; + cgltf_size targets_count; + cgltf_extras extras; + cgltf_bool has_draco_mesh_compression; + cgltf_draco_mesh_compression draco_mesh_compression; + cgltf_material_mapping* mappings; + cgltf_size mappings_count; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_primitive; + +typedef struct cgltf_mesh { + char* name; + cgltf_primitive* primitives; + cgltf_size primitives_count; + cgltf_float* weights; + cgltf_size weights_count; + char** target_names; + cgltf_size target_names_count; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_mesh; + +typedef struct cgltf_node cgltf_node; + +typedef struct cgltf_skin { + char* name; + cgltf_node** joints; + cgltf_size joints_count; + cgltf_node* skeleton; + cgltf_accessor* inverse_bind_matrices; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_skin; + +typedef struct cgltf_camera_perspective { + cgltf_bool has_aspect_ratio; + cgltf_float aspect_ratio; + cgltf_float yfov; + cgltf_bool has_zfar; + cgltf_float zfar; + cgltf_float znear; + cgltf_extras extras; +} cgltf_camera_perspective; + +typedef struct cgltf_camera_orthographic { + cgltf_float xmag; + cgltf_float ymag; + cgltf_float zfar; + cgltf_float znear; + cgltf_extras extras; +} cgltf_camera_orthographic; + +typedef struct cgltf_camera { + char* name; + cgltf_camera_type type; + union { + cgltf_camera_perspective perspective; + cgltf_camera_orthographic orthographic; + } data; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_camera; + +typedef struct cgltf_light { + char* name; + cgltf_float color[3]; + cgltf_float intensity; + cgltf_light_type type; + cgltf_float range; + cgltf_float spot_inner_cone_angle; + cgltf_float spot_outer_cone_angle; + cgltf_extras extras; +} cgltf_light; + +struct cgltf_node { + char* name; + cgltf_node* parent; + cgltf_node** children; + cgltf_size children_count; + cgltf_skin* skin; + cgltf_mesh* mesh; + cgltf_camera* camera; + cgltf_light* light; + cgltf_float* weights; + cgltf_size weights_count; + cgltf_bool has_translation; + cgltf_bool has_rotation; + cgltf_bool has_scale; + cgltf_bool has_matrix; + cgltf_float translation[3]; + cgltf_float rotation[4]; + cgltf_float scale[3]; + cgltf_float matrix[16]; + cgltf_extras extras; + cgltf_bool has_mesh_gpu_instancing; + cgltf_mesh_gpu_instancing mesh_gpu_instancing; + cgltf_size extensions_count; + cgltf_extension* extensions; +}; + +typedef struct cgltf_scene { + char* name; + cgltf_node** nodes; + cgltf_size nodes_count; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_scene; + +typedef struct cgltf_animation_sampler { + cgltf_accessor* input; + cgltf_accessor* output; + cgltf_interpolation_type interpolation; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_animation_sampler; + +typedef struct cgltf_animation_channel { + cgltf_animation_sampler* sampler; + cgltf_node* target_node; + cgltf_animation_path_type target_path; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_animation_channel; + +typedef struct cgltf_animation { + char* name; + cgltf_animation_sampler* samplers; + cgltf_size samplers_count; + cgltf_animation_channel* channels; + cgltf_size channels_count; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_animation; + +typedef struct cgltf_material_variant +{ + char* name; + cgltf_extras extras; +} cgltf_material_variant; + +typedef struct cgltf_asset { + char* copyright; + char* generator; + char* version; + char* min_version; + cgltf_extras extras; + cgltf_size extensions_count; + cgltf_extension* extensions; +} cgltf_asset; + +typedef struct cgltf_data +{ + cgltf_file_type file_type; + void* file_data; + + cgltf_asset asset; + + cgltf_mesh* meshes; + cgltf_size meshes_count; + + cgltf_material* materials; + cgltf_size materials_count; + + cgltf_accessor* accessors; + cgltf_size accessors_count; + + cgltf_buffer_view* buffer_views; + cgltf_size buffer_views_count; + + cgltf_buffer* buffers; + cgltf_size buffers_count; + + cgltf_image* images; + cgltf_size images_count; + + cgltf_texture* textures; + cgltf_size textures_count; + + cgltf_sampler* samplers; + cgltf_size samplers_count; + + cgltf_skin* skins; + cgltf_size skins_count; + + cgltf_camera* cameras; + cgltf_size cameras_count; + + cgltf_light* lights; + cgltf_size lights_count; + + cgltf_node* nodes; + cgltf_size nodes_count; + + cgltf_scene* scenes; + cgltf_size scenes_count; + + cgltf_scene* scene; + + cgltf_animation* animations; + cgltf_size animations_count; + + cgltf_material_variant* variants; + cgltf_size variants_count; + + cgltf_extras extras; + + cgltf_size data_extensions_count; + cgltf_extension* data_extensions; + + char** extensions_used; + cgltf_size extensions_used_count; + + char** extensions_required; + cgltf_size extensions_required_count; + + const char* json; + cgltf_size json_size; + + const void* bin; + cgltf_size bin_size; + + cgltf_memory_options memory; + cgltf_file_options file; +} cgltf_data; + +cgltf_result cgltf_parse( + const cgltf_options* options, + const void* data, + cgltf_size size, + cgltf_data** out_data); + +cgltf_result cgltf_parse_file( + const cgltf_options* options, + const char* path, + cgltf_data** out_data); + +cgltf_result cgltf_load_buffers( + const cgltf_options* options, + cgltf_data* data, + const char* gltf_path); + +cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data); + +cgltf_size cgltf_decode_string(char* string); +cgltf_size cgltf_decode_uri(char* uri); + +cgltf_result cgltf_validate(cgltf_data* data); + +void cgltf_free(cgltf_data* data); + +void cgltf_node_transform_local(const cgltf_node* node, cgltf_float* out_matrix); +void cgltf_node_transform_world(const cgltf_node* node, cgltf_float* out_matrix); + +const uint8_t* cgltf_buffer_view_data(const cgltf_buffer_view* view); + +cgltf_bool cgltf_accessor_read_float(const cgltf_accessor* accessor, cgltf_size index, cgltf_float* out, cgltf_size element_size); +cgltf_bool cgltf_accessor_read_uint(const cgltf_accessor* accessor, cgltf_size index, cgltf_uint* out, cgltf_size element_size); +cgltf_size cgltf_accessor_read_index(const cgltf_accessor* accessor, cgltf_size index); + +cgltf_size cgltf_num_components(cgltf_type type); +cgltf_size cgltf_component_size(cgltf_component_type component_type); +cgltf_size cgltf_calc_size(cgltf_type type, cgltf_component_type component_type); + +cgltf_size cgltf_accessor_unpack_floats(const cgltf_accessor* accessor, cgltf_float* out, cgltf_size float_count); +cgltf_size cgltf_accessor_unpack_indices(const cgltf_accessor* accessor, cgltf_uint* out, cgltf_size index_count); + +/* this function is deprecated and will be removed in the future; use cgltf_extras::data instead */ +cgltf_result cgltf_copy_extras_json(const cgltf_data* data, const cgltf_extras* extras, char* dest, cgltf_size* dest_size); + +cgltf_size cgltf_mesh_index(const cgltf_data* data, const cgltf_mesh* object); +cgltf_size cgltf_material_index(const cgltf_data* data, const cgltf_material* object); +cgltf_size cgltf_accessor_index(const cgltf_data* data, const cgltf_accessor* object); +cgltf_size cgltf_buffer_view_index(const cgltf_data* data, const cgltf_buffer_view* object); +cgltf_size cgltf_buffer_index(const cgltf_data* data, const cgltf_buffer* object); +cgltf_size cgltf_image_index(const cgltf_data* data, const cgltf_image* object); +cgltf_size cgltf_texture_index(const cgltf_data* data, const cgltf_texture* object); +cgltf_size cgltf_sampler_index(const cgltf_data* data, const cgltf_sampler* object); +cgltf_size cgltf_skin_index(const cgltf_data* data, const cgltf_skin* object); +cgltf_size cgltf_camera_index(const cgltf_data* data, const cgltf_camera* object); +cgltf_size cgltf_light_index(const cgltf_data* data, const cgltf_light* object); +cgltf_size cgltf_node_index(const cgltf_data* data, const cgltf_node* object); +cgltf_size cgltf_scene_index(const cgltf_data* data, const cgltf_scene* object); +cgltf_size cgltf_animation_index(const cgltf_data* data, const cgltf_animation* object); +cgltf_size cgltf_animation_sampler_index(const cgltf_animation* animation, const cgltf_animation_sampler* object); +cgltf_size cgltf_animation_channel_index(const cgltf_animation* animation, const cgltf_animation_channel* object); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef CGLTF_H_INCLUDED__ */ + +/* + * + * Stop now, if you are only interested in the API. + * Below, you find the implementation. + * + */ + +#if defined(__INTELLISENSE__) || defined(__JETBRAINS_IDE__) +/* This makes MSVC/CLion intellisense work. */ +#define CGLTF_IMPLEMENTATION +#endif + +#ifdef CGLTF_IMPLEMENTATION + +#include /* For assert */ +#include /* For strncpy */ +#include /* For fopen */ +#include /* For UINT_MAX etc */ +#include /* For FLT_MAX */ + +#if !defined(CGLTF_MALLOC) || !defined(CGLTF_FREE) || !defined(CGLTF_ATOI) || !defined(CGLTF_ATOF) || !defined(CGLTF_ATOLL) +#include /* For malloc, free, atoi, atof */ +#endif + +/* JSMN_PARENT_LINKS is necessary to make parsing large structures linear in input size */ +#define JSMN_PARENT_LINKS + +/* JSMN_STRICT is necessary to reject invalid JSON documents */ +#define JSMN_STRICT + +/* + * -- jsmn.h start -- + * Source: https://github.com/zserge/jsmn + * License: MIT + */ +typedef enum { + JSMN_UNDEFINED = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3, + JSMN_PRIMITIVE = 4 +} jsmntype_t; +enum jsmnerr { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3 +}; +typedef struct { + jsmntype_t type; + ptrdiff_t start; + ptrdiff_t end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; +typedef struct { + size_t pos; /* offset in the JSON string */ + unsigned int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g parent object or array */ +} jsmn_parser; +static void jsmn_init(jsmn_parser *parser); +static int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens); +/* + * -- jsmn.h end -- + */ + + +#ifndef CGLTF_CONSTS +static const cgltf_size GlbHeaderSize = 12; +static const cgltf_size GlbChunkHeaderSize = 8; +static const uint32_t GlbVersion = 2; +static const uint32_t GlbMagic = 0x46546C67; +static const uint32_t GlbMagicJsonChunk = 0x4E4F534A; +static const uint32_t GlbMagicBinChunk = 0x004E4942; +#define CGLTF_CONSTS +#endif + +#ifndef CGLTF_MALLOC +#define CGLTF_MALLOC(size) malloc(size) +#endif +#ifndef CGLTF_FREE +#define CGLTF_FREE(ptr) free(ptr) +#endif +#ifndef CGLTF_ATOI +#define CGLTF_ATOI(str) atoi(str) +#endif +#ifndef CGLTF_ATOF +#define CGLTF_ATOF(str) atof(str) +#endif +#ifndef CGLTF_ATOLL +#define CGLTF_ATOLL(str) atoll(str) +#endif +#ifndef CGLTF_VALIDATE_ENABLE_ASSERTS +#define CGLTF_VALIDATE_ENABLE_ASSERTS 0 +#endif + +static void* cgltf_default_alloc(void* user, cgltf_size size) +{ + (void)user; + return CGLTF_MALLOC(size); +} + +static void cgltf_default_free(void* user, void* ptr) +{ + (void)user; + CGLTF_FREE(ptr); +} + +static void* cgltf_calloc(cgltf_options* options, size_t element_size, cgltf_size count) +{ + if (SIZE_MAX / element_size < count) + { + return NULL; + } + void* result = options->memory.alloc_func(options->memory.user_data, element_size * count); + if (!result) + { + return NULL; + } + memset(result, 0, element_size * count); + return result; +} + +static cgltf_result cgltf_default_file_read(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, const char* path, cgltf_size* size, void** data) +{ + (void)file_options; + void* (*memory_alloc)(void*, cgltf_size) = memory_options->alloc_func ? memory_options->alloc_func : &cgltf_default_alloc; + void (*memory_free)(void*, void*) = memory_options->free_func ? memory_options->free_func : &cgltf_default_free; + + FILE* file = fopen(path, "rb"); + if (!file) + { + return cgltf_result_file_not_found; + } + + cgltf_size file_size = size ? *size : 0; + + if (file_size == 0) + { + fseek(file, 0, SEEK_END); + +#ifdef _MSC_VER + __int64 length = _ftelli64(file); +#else + long length = ftell(file); +#endif + + if (length < 0) + { + fclose(file); + return cgltf_result_io_error; + } + + fseek(file, 0, SEEK_SET); + file_size = (cgltf_size)length; + } + + char* file_data = (char*)memory_alloc(memory_options->user_data, file_size); + if (!file_data) + { + fclose(file); + return cgltf_result_out_of_memory; + } + + cgltf_size read_size = fread(file_data, 1, file_size, file); + + fclose(file); + + if (read_size != file_size) + { + memory_free(memory_options->user_data, file_data); + return cgltf_result_io_error; + } + + if (size) + { + *size = file_size; + } + if (data) + { + *data = file_data; + } + + return cgltf_result_success; +} + +static void cgltf_default_file_release(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, void* data) +{ + (void)file_options; + void (*memfree)(void*, void*) = memory_options->free_func ? memory_options->free_func : &cgltf_default_free; + memfree(memory_options->user_data, data); +} + +static cgltf_result cgltf_parse_json(cgltf_options* options, const uint8_t* json_chunk, cgltf_size size, cgltf_data** out_data); + +cgltf_result cgltf_parse(const cgltf_options* options, const void* data, cgltf_size size, cgltf_data** out_data) +{ + if (size < GlbHeaderSize) + { + return cgltf_result_data_too_short; + } + + if (options == NULL) + { + return cgltf_result_invalid_options; + } + + cgltf_options fixed_options = *options; + if (fixed_options.memory.alloc_func == NULL) + { + fixed_options.memory.alloc_func = &cgltf_default_alloc; + } + if (fixed_options.memory.free_func == NULL) + { + fixed_options.memory.free_func = &cgltf_default_free; + } + + uint32_t tmp; + // Magic + memcpy(&tmp, data, 4); + if (tmp != GlbMagic) + { + if (fixed_options.type == cgltf_file_type_invalid) + { + fixed_options.type = cgltf_file_type_gltf; + } + else if (fixed_options.type == cgltf_file_type_glb) + { + return cgltf_result_unknown_format; + } + } + + if (fixed_options.type == cgltf_file_type_gltf) + { + cgltf_result json_result = cgltf_parse_json(&fixed_options, (const uint8_t*)data, size, out_data); + if (json_result != cgltf_result_success) + { + return json_result; + } + + (*out_data)->file_type = cgltf_file_type_gltf; + + return cgltf_result_success; + } + + const uint8_t* ptr = (const uint8_t*)data; + // Version + memcpy(&tmp, ptr + 4, 4); + uint32_t version = tmp; + if (version != GlbVersion) + { + return version < GlbVersion ? cgltf_result_legacy_gltf : cgltf_result_unknown_format; + } + + // Total length + memcpy(&tmp, ptr + 8, 4); + if (tmp > size) + { + return cgltf_result_data_too_short; + } + + const uint8_t* json_chunk = ptr + GlbHeaderSize; + + if (GlbHeaderSize + GlbChunkHeaderSize > size) + { + return cgltf_result_data_too_short; + } + + // JSON chunk: length + uint32_t json_length; + memcpy(&json_length, json_chunk, 4); + if (GlbHeaderSize + GlbChunkHeaderSize + json_length > size) + { + return cgltf_result_data_too_short; + } + + // JSON chunk: magic + memcpy(&tmp, json_chunk + 4, 4); + if (tmp != GlbMagicJsonChunk) + { + return cgltf_result_unknown_format; + } + + json_chunk += GlbChunkHeaderSize; + + const void* bin = 0; + cgltf_size bin_size = 0; + + if (GlbHeaderSize + GlbChunkHeaderSize + json_length + GlbChunkHeaderSize <= size) + { + // We can read another chunk + const uint8_t* bin_chunk = json_chunk + json_length; + + // Bin chunk: length + uint32_t bin_length; + memcpy(&bin_length, bin_chunk, 4); + if (GlbHeaderSize + GlbChunkHeaderSize + json_length + GlbChunkHeaderSize + bin_length > size) + { + return cgltf_result_data_too_short; + } + + // Bin chunk: magic + memcpy(&tmp, bin_chunk + 4, 4); + if (tmp != GlbMagicBinChunk) + { + return cgltf_result_unknown_format; + } + + bin_chunk += GlbChunkHeaderSize; + + bin = bin_chunk; + bin_size = bin_length; + } + + cgltf_result json_result = cgltf_parse_json(&fixed_options, json_chunk, json_length, out_data); + if (json_result != cgltf_result_success) + { + return json_result; + } + + (*out_data)->file_type = cgltf_file_type_glb; + (*out_data)->bin = bin; + (*out_data)->bin_size = bin_size; + + return cgltf_result_success; +} + +cgltf_result cgltf_parse_file(const cgltf_options* options, const char* path, cgltf_data** out_data) +{ + if (options == NULL) + { + return cgltf_result_invalid_options; + } + + cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read; + void (*file_release)(const struct cgltf_memory_options*, const struct cgltf_file_options*, void* data) = options->file.release ? options->file.release : cgltf_default_file_release; + + void* file_data = NULL; + cgltf_size file_size = 0; + cgltf_result result = file_read(&options->memory, &options->file, path, &file_size, &file_data); + if (result != cgltf_result_success) + { + return result; + } + + result = cgltf_parse(options, file_data, file_size, out_data); + + if (result != cgltf_result_success) + { + file_release(&options->memory, &options->file, file_data); + return result; + } + + (*out_data)->file_data = file_data; + + return cgltf_result_success; +} + +static void cgltf_combine_paths(char* path, const char* base, const char* uri) +{ + const char* s0 = strrchr(base, '/'); + const char* s1 = strrchr(base, '\\'); + const char* slash = s0 ? (s1 && s1 > s0 ? s1 : s0) : s1; + + if (slash) + { + size_t prefix = slash - base + 1; + + strncpy(path, base, prefix); + strcpy(path + prefix, uri); + } + else + { + strcpy(path, uri); + } +} + +static cgltf_result cgltf_load_buffer_file(const cgltf_options* options, cgltf_size size, const char* uri, const char* gltf_path, void** out_data) +{ + void* (*memory_alloc)(void*, cgltf_size) = options->memory.alloc_func ? options->memory.alloc_func : &cgltf_default_alloc; + void (*memory_free)(void*, void*) = options->memory.free_func ? options->memory.free_func : &cgltf_default_free; + cgltf_result (*file_read)(const struct cgltf_memory_options*, const struct cgltf_file_options*, const char*, cgltf_size*, void**) = options->file.read ? options->file.read : &cgltf_default_file_read; + + char* path = (char*)memory_alloc(options->memory.user_data, strlen(uri) + strlen(gltf_path) + 1); + if (!path) + { + return cgltf_result_out_of_memory; + } + + cgltf_combine_paths(path, gltf_path, uri); + + // after combining, the tail of the resulting path is a uri; decode_uri converts it into path + cgltf_decode_uri(path + strlen(path) - strlen(uri)); + + void* file_data = NULL; + cgltf_result result = file_read(&options->memory, &options->file, path, &size, &file_data); + + memory_free(options->memory.user_data, path); + + *out_data = (result == cgltf_result_success) ? file_data : NULL; + + return result; +} + +cgltf_result cgltf_load_buffer_base64(const cgltf_options* options, cgltf_size size, const char* base64, void** out_data) +{ + void* (*memory_alloc)(void*, cgltf_size) = options->memory.alloc_func ? options->memory.alloc_func : &cgltf_default_alloc; + void (*memory_free)(void*, void*) = options->memory.free_func ? options->memory.free_func : &cgltf_default_free; + + unsigned char* data = (unsigned char*)memory_alloc(options->memory.user_data, size); + if (!data) + { + return cgltf_result_out_of_memory; + } + + unsigned int buffer = 0; + unsigned int buffer_bits = 0; + + for (cgltf_size i = 0; i < size; ++i) + { + while (buffer_bits < 8) + { + char ch = *base64++; + + int index = + (unsigned)(ch - 'A') < 26 ? (ch - 'A') : + (unsigned)(ch - 'a') < 26 ? (ch - 'a') + 26 : + (unsigned)(ch - '0') < 10 ? (ch - '0') + 52 : + ch == '+' ? 62 : + ch == '/' ? 63 : + -1; + + if (index < 0) + { + memory_free(options->memory.user_data, data); + return cgltf_result_io_error; + } + + buffer = (buffer << 6) | index; + buffer_bits += 6; + } + + data[i] = (unsigned char)(buffer >> (buffer_bits - 8)); + buffer_bits -= 8; + } + + *out_data = data; + + return cgltf_result_success; +} + +static int cgltf_unhex(char ch) +{ + return + (unsigned)(ch - '0') < 10 ? (ch - '0') : + (unsigned)(ch - 'A') < 6 ? (ch - 'A') + 10 : + (unsigned)(ch - 'a') < 6 ? (ch - 'a') + 10 : + -1; +} + +cgltf_size cgltf_decode_string(char* string) +{ + char* read = string + strcspn(string, "\\"); + if (*read == 0) + { + return read - string; + } + char* write = string; + char* last = string; + + for (;;) + { + // Copy characters since last escaped sequence + cgltf_size written = read - last; + memmove(write, last, written); + write += written; + + if (*read++ == 0) + { + break; + } + + // jsmn already checked that all escape sequences are valid + switch (*read++) + { + case '\"': *write++ = '\"'; break; + case '/': *write++ = '/'; break; + case '\\': *write++ = '\\'; break; + case 'b': *write++ = '\b'; break; + case 'f': *write++ = '\f'; break; + case 'r': *write++ = '\r'; break; + case 'n': *write++ = '\n'; break; + case 't': *write++ = '\t'; break; + case 'u': + { + // UCS-2 codepoint \uXXXX to UTF-8 + int character = 0; + for (cgltf_size i = 0; i < 4; ++i) + { + character = (character << 4) + cgltf_unhex(*read++); + } + + if (character <= 0x7F) + { + *write++ = character & 0xFF; + } + else if (character <= 0x7FF) + { + *write++ = 0xC0 | ((character >> 6) & 0xFF); + *write++ = 0x80 | (character & 0x3F); + } + else + { + *write++ = 0xE0 | ((character >> 12) & 0xFF); + *write++ = 0x80 | ((character >> 6) & 0x3F); + *write++ = 0x80 | (character & 0x3F); + } + break; + } + default: + break; + } + + last = read; + read += strcspn(read, "\\"); + } + + *write = 0; + return write - string; +} + +cgltf_size cgltf_decode_uri(char* uri) +{ + char* write = uri; + char* i = uri; + + while (*i) + { + if (*i == '%') + { + int ch1 = cgltf_unhex(i[1]); + + if (ch1 >= 0) + { + int ch2 = cgltf_unhex(i[2]); + + if (ch2 >= 0) + { + *write++ = (char)(ch1 * 16 + ch2); + i += 3; + continue; + } + } + } + + *write++ = *i++; + } + + *write = 0; + return write - uri; +} + +cgltf_result cgltf_load_buffers(const cgltf_options* options, cgltf_data* data, const char* gltf_path) +{ + if (options == NULL) + { + return cgltf_result_invalid_options; + } + + if (data->buffers_count && data->buffers[0].data == NULL && data->buffers[0].uri == NULL && data->bin) + { + if (data->bin_size < data->buffers[0].size) + { + return cgltf_result_data_too_short; + } + + data->buffers[0].data = (void*)data->bin; + data->buffers[0].data_free_method = cgltf_data_free_method_none; + } + + for (cgltf_size i = 0; i < data->buffers_count; ++i) + { + if (data->buffers[i].data) + { + continue; + } + + const char* uri = data->buffers[i].uri; + + if (uri == NULL) + { + continue; + } + + if (strncmp(uri, "data:", 5) == 0) + { + const char* comma = strchr(uri, ','); + + if (comma && comma - uri >= 7 && strncmp(comma - 7, ";base64", 7) == 0) + { + cgltf_result res = cgltf_load_buffer_base64(options, data->buffers[i].size, comma + 1, &data->buffers[i].data); + data->buffers[i].data_free_method = cgltf_data_free_method_memory_free; + + if (res != cgltf_result_success) + { + return res; + } + } + else + { + return cgltf_result_unknown_format; + } + } + else if (strstr(uri, "://") == NULL && gltf_path) + { + cgltf_result res = cgltf_load_buffer_file(options, data->buffers[i].size, uri, gltf_path, &data->buffers[i].data); + data->buffers[i].data_free_method = cgltf_data_free_method_file_release; + + if (res != cgltf_result_success) + { + return res; + } + } + else + { + return cgltf_result_unknown_format; + } + } + + return cgltf_result_success; +} + +static cgltf_size cgltf_calc_index_bound(cgltf_buffer_view* buffer_view, cgltf_size offset, cgltf_component_type component_type, cgltf_size count) +{ + char* data = (char*)buffer_view->buffer->data + offset + buffer_view->offset; + cgltf_size bound = 0; + + switch (component_type) + { + case cgltf_component_type_r_8u: + for (size_t i = 0; i < count; ++i) + { + cgltf_size v = ((unsigned char*)data)[i]; + bound = bound > v ? bound : v; + } + break; + + case cgltf_component_type_r_16u: + for (size_t i = 0; i < count; ++i) + { + cgltf_size v = ((unsigned short*)data)[i]; + bound = bound > v ? bound : v; + } + break; + + case cgltf_component_type_r_32u: + for (size_t i = 0; i < count; ++i) + { + cgltf_size v = ((unsigned int*)data)[i]; + bound = bound > v ? bound : v; + } + break; + + default: + ; + } + + return bound; +} + +#if CGLTF_VALIDATE_ENABLE_ASSERTS +#define CGLTF_ASSERT_IF(cond, result) assert(!(cond)); if (cond) return result; +#else +#define CGLTF_ASSERT_IF(cond, result) if (cond) return result; +#endif + +cgltf_result cgltf_validate(cgltf_data* data) +{ + for (cgltf_size i = 0; i < data->accessors_count; ++i) + { + cgltf_accessor* accessor = &data->accessors[i]; + + cgltf_size element_size = cgltf_calc_size(accessor->type, accessor->component_type); + + if (accessor->buffer_view) + { + cgltf_size req_size = accessor->offset + accessor->stride * (accessor->count - 1) + element_size; + + CGLTF_ASSERT_IF(accessor->buffer_view->size < req_size, cgltf_result_data_too_short); + } + + if (accessor->is_sparse) + { + cgltf_accessor_sparse* sparse = &accessor->sparse; + + cgltf_size indices_component_size = cgltf_calc_size(cgltf_type_scalar, sparse->indices_component_type); + cgltf_size indices_req_size = sparse->indices_byte_offset + indices_component_size * sparse->count; + cgltf_size values_req_size = sparse->values_byte_offset + element_size * sparse->count; + + CGLTF_ASSERT_IF(sparse->indices_buffer_view->size < indices_req_size || + sparse->values_buffer_view->size < values_req_size, cgltf_result_data_too_short); + + CGLTF_ASSERT_IF(sparse->indices_component_type != cgltf_component_type_r_8u && + sparse->indices_component_type != cgltf_component_type_r_16u && + sparse->indices_component_type != cgltf_component_type_r_32u, cgltf_result_invalid_gltf); + + if (sparse->indices_buffer_view->buffer->data) + { + cgltf_size index_bound = cgltf_calc_index_bound(sparse->indices_buffer_view, sparse->indices_byte_offset, sparse->indices_component_type, sparse->count); + + CGLTF_ASSERT_IF(index_bound >= accessor->count, cgltf_result_data_too_short); + } + } + } + + for (cgltf_size i = 0; i < data->buffer_views_count; ++i) + { + cgltf_size req_size = data->buffer_views[i].offset + data->buffer_views[i].size; + + CGLTF_ASSERT_IF(data->buffer_views[i].buffer && data->buffer_views[i].buffer->size < req_size, cgltf_result_data_too_short); + + if (data->buffer_views[i].has_meshopt_compression) + { + cgltf_meshopt_compression* mc = &data->buffer_views[i].meshopt_compression; + + CGLTF_ASSERT_IF(mc->buffer == NULL || mc->buffer->size < mc->offset + mc->size, cgltf_result_data_too_short); + + CGLTF_ASSERT_IF(data->buffer_views[i].stride && mc->stride != data->buffer_views[i].stride, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(data->buffer_views[i].size != mc->stride * mc->count, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_invalid, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_attributes && !(mc->stride % 4 == 0 && mc->stride <= 256), cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(mc->mode == cgltf_meshopt_compression_mode_triangles && mc->count % 3 != 0, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF((mc->mode == cgltf_meshopt_compression_mode_triangles || mc->mode == cgltf_meshopt_compression_mode_indices) && mc->stride != 2 && mc->stride != 4, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF((mc->mode == cgltf_meshopt_compression_mode_triangles || mc->mode == cgltf_meshopt_compression_mode_indices) && mc->filter != cgltf_meshopt_compression_filter_none, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(mc->filter == cgltf_meshopt_compression_filter_octahedral && mc->stride != 4 && mc->stride != 8, cgltf_result_invalid_gltf); + + CGLTF_ASSERT_IF(mc->filter == cgltf_meshopt_compression_filter_quaternion && mc->stride != 8, cgltf_result_invalid_gltf); + } + } + + for (cgltf_size i = 0; i < data->meshes_count; ++i) + { + if (data->meshes[i].weights) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives_count && data->meshes[i].primitives[0].targets_count != data->meshes[i].weights_count, cgltf_result_invalid_gltf); + } + + if (data->meshes[i].target_names) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives_count && data->meshes[i].primitives[0].targets_count != data->meshes[i].target_names_count, cgltf_result_invalid_gltf); + } + + for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives[j].targets_count != data->meshes[i].primitives[0].targets_count, cgltf_result_invalid_gltf); + + if (data->meshes[i].primitives[j].attributes_count) + { + cgltf_accessor* first = data->meshes[i].primitives[j].attributes[0].data; + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives[j].attributes[k].data->count != first->count, cgltf_result_invalid_gltf); + } + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k) + { + for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives[j].targets[k].attributes[m].data->count != first->count, cgltf_result_invalid_gltf); + } + } + + cgltf_accessor* indices = data->meshes[i].primitives[j].indices; + + CGLTF_ASSERT_IF(indices && + indices->component_type != cgltf_component_type_r_8u && + indices->component_type != cgltf_component_type_r_16u && + indices->component_type != cgltf_component_type_r_32u, cgltf_result_invalid_gltf); + + if (indices && indices->buffer_view && indices->buffer_view->buffer->data) + { + cgltf_size index_bound = cgltf_calc_index_bound(indices->buffer_view, indices->offset, indices->component_type, indices->count); + + CGLTF_ASSERT_IF(index_bound >= first->count, cgltf_result_data_too_short); + } + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k) + { + CGLTF_ASSERT_IF(data->meshes[i].primitives[j].mappings[k].variant >= data->variants_count, cgltf_result_invalid_gltf); + } + } + } + } + + for (cgltf_size i = 0; i < data->nodes_count; ++i) + { + if (data->nodes[i].weights && data->nodes[i].mesh) + { + CGLTF_ASSERT_IF (data->nodes[i].mesh->primitives_count && data->nodes[i].mesh->primitives[0].targets_count != data->nodes[i].weights_count, cgltf_result_invalid_gltf); + } + } + + for (cgltf_size i = 0; i < data->nodes_count; ++i) + { + cgltf_node* p1 = data->nodes[i].parent; + cgltf_node* p2 = p1 ? p1->parent : NULL; + + while (p1 && p2) + { + CGLTF_ASSERT_IF(p1 == p2, cgltf_result_invalid_gltf); + + p1 = p1->parent; + p2 = p2->parent ? p2->parent->parent : NULL; + } + } + + for (cgltf_size i = 0; i < data->scenes_count; ++i) + { + for (cgltf_size j = 0; j < data->scenes[i].nodes_count; ++j) + { + CGLTF_ASSERT_IF(data->scenes[i].nodes[j]->parent, cgltf_result_invalid_gltf); + } + } + + for (cgltf_size i = 0; i < data->animations_count; ++i) + { + for (cgltf_size j = 0; j < data->animations[i].channels_count; ++j) + { + cgltf_animation_channel* channel = &data->animations[i].channels[j]; + + if (!channel->target_node) + { + continue; + } + + cgltf_size components = 1; + + if (channel->target_path == cgltf_animation_path_type_weights) + { + CGLTF_ASSERT_IF(!channel->target_node->mesh || !channel->target_node->mesh->primitives_count, cgltf_result_invalid_gltf); + + components = channel->target_node->mesh->primitives[0].targets_count; + } + + cgltf_size values = channel->sampler->interpolation == cgltf_interpolation_type_cubic_spline ? 3 : 1; + + CGLTF_ASSERT_IF(channel->sampler->input->count * components * values != channel->sampler->output->count, cgltf_result_data_too_short); + } + } + + return cgltf_result_success; +} + +cgltf_result cgltf_copy_extras_json(const cgltf_data* data, const cgltf_extras* extras, char* dest, cgltf_size* dest_size) +{ + cgltf_size json_size = extras->end_offset - extras->start_offset; + + if (!dest) + { + if (dest_size) + { + *dest_size = json_size + 1; + return cgltf_result_success; + } + return cgltf_result_invalid_options; + } + + if (*dest_size + 1 < json_size) + { + strncpy(dest, data->json + extras->start_offset, *dest_size - 1); + dest[*dest_size - 1] = 0; + } + else + { + strncpy(dest, data->json + extras->start_offset, json_size); + dest[json_size] = 0; + } + + return cgltf_result_success; +} + +static void cgltf_free_extras(cgltf_data* data, cgltf_extras* extras) +{ + data->memory.free_func(data->memory.user_data, extras->data); +} + +static void cgltf_free_extensions(cgltf_data* data, cgltf_extension* extensions, cgltf_size extensions_count) +{ + for (cgltf_size i = 0; i < extensions_count; ++i) + { + data->memory.free_func(data->memory.user_data, extensions[i].name); + data->memory.free_func(data->memory.user_data, extensions[i].data); + } + data->memory.free_func(data->memory.user_data, extensions); +} + +static void cgltf_free_texture_view(cgltf_data* data, cgltf_texture_view* view) +{ + cgltf_free_extensions(data, view->extensions, view->extensions_count); + cgltf_free_extras(data, &view->extras); +} + +void cgltf_free(cgltf_data* data) +{ + if (!data) + { + return; + } + + void (*file_release)(const struct cgltf_memory_options*, const struct cgltf_file_options*, void* data) = data->file.release ? data->file.release : cgltf_default_file_release; + + data->memory.free_func(data->memory.user_data, data->asset.copyright); + data->memory.free_func(data->memory.user_data, data->asset.generator); + data->memory.free_func(data->memory.user_data, data->asset.version); + data->memory.free_func(data->memory.user_data, data->asset.min_version); + + cgltf_free_extensions(data, data->asset.extensions, data->asset.extensions_count); + cgltf_free_extras(data, &data->asset.extras); + + for (cgltf_size i = 0; i < data->accessors_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->accessors[i].name); + + if(data->accessors[i].is_sparse) + { + cgltf_free_extensions(data, data->accessors[i].sparse.extensions, data->accessors[i].sparse.extensions_count); + cgltf_free_extensions(data, data->accessors[i].sparse.indices_extensions, data->accessors[i].sparse.indices_extensions_count); + cgltf_free_extensions(data, data->accessors[i].sparse.values_extensions, data->accessors[i].sparse.values_extensions_count); + cgltf_free_extras(data, &data->accessors[i].sparse.extras); + cgltf_free_extras(data, &data->accessors[i].sparse.indices_extras); + cgltf_free_extras(data, &data->accessors[i].sparse.values_extras); + } + cgltf_free_extensions(data, data->accessors[i].extensions, data->accessors[i].extensions_count); + cgltf_free_extras(data, &data->accessors[i].extras); + } + data->memory.free_func(data->memory.user_data, data->accessors); + + for (cgltf_size i = 0; i < data->buffer_views_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->buffer_views[i].name); + data->memory.free_func(data->memory.user_data, data->buffer_views[i].data); + + cgltf_free_extensions(data, data->buffer_views[i].extensions, data->buffer_views[i].extensions_count); + cgltf_free_extras(data, &data->buffer_views[i].extras); + } + data->memory.free_func(data->memory.user_data, data->buffer_views); + + for (cgltf_size i = 0; i < data->buffers_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->buffers[i].name); + + if (data->buffers[i].data_free_method == cgltf_data_free_method_file_release) + { + file_release(&data->memory, &data->file, data->buffers[i].data); + } + else if (data->buffers[i].data_free_method == cgltf_data_free_method_memory_free) + { + data->memory.free_func(data->memory.user_data, data->buffers[i].data); + } + + data->memory.free_func(data->memory.user_data, data->buffers[i].uri); + + cgltf_free_extensions(data, data->buffers[i].extensions, data->buffers[i].extensions_count); + cgltf_free_extras(data, &data->buffers[i].extras); + } + data->memory.free_func(data->memory.user_data, data->buffers); + + for (cgltf_size i = 0; i < data->meshes_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->meshes[i].name); + + for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j) + { + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k) + { + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].attributes[k].name); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].attributes); + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k) + { + for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m) + { + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets[k].attributes[m].name); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets[k].attributes); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].targets); + + if (data->meshes[i].primitives[j].has_draco_mesh_compression) + { + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].draco_mesh_compression.attributes_count; ++k) + { + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].draco_mesh_compression.attributes[k].name); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].draco_mesh_compression.attributes); + } + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k) + { + cgltf_free_extras(data, &data->meshes[i].primitives[j].mappings[k].extras); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives[j].mappings); + + cgltf_free_extensions(data, data->meshes[i].primitives[j].extensions, data->meshes[i].primitives[j].extensions_count); + cgltf_free_extras(data, &data->meshes[i].primitives[j].extras); + } + + data->memory.free_func(data->memory.user_data, data->meshes[i].primitives); + data->memory.free_func(data->memory.user_data, data->meshes[i].weights); + + for (cgltf_size j = 0; j < data->meshes[i].target_names_count; ++j) + { + data->memory.free_func(data->memory.user_data, data->meshes[i].target_names[j]); + } + + cgltf_free_extensions(data, data->meshes[i].extensions, data->meshes[i].extensions_count); + cgltf_free_extras(data, &data->meshes[i].extras); + + data->memory.free_func(data->memory.user_data, data->meshes[i].target_names); + } + + data->memory.free_func(data->memory.user_data, data->meshes); + + for (cgltf_size i = 0; i < data->materials_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->materials[i].name); + + if(data->materials[i].has_pbr_metallic_roughness) + { + cgltf_free_texture_view(data, &data->materials[i].pbr_metallic_roughness.metallic_roughness_texture); + cgltf_free_texture_view(data, &data->materials[i].pbr_metallic_roughness.base_color_texture); + } + if(data->materials[i].has_pbr_specular_glossiness) + { + cgltf_free_texture_view(data, &data->materials[i].pbr_specular_glossiness.diffuse_texture); + cgltf_free_texture_view(data, &data->materials[i].pbr_specular_glossiness.specular_glossiness_texture); + } + if(data->materials[i].has_clearcoat) + { + cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_texture); + cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_roughness_texture); + cgltf_free_texture_view(data, &data->materials[i].clearcoat.clearcoat_normal_texture); + } + if(data->materials[i].has_specular) + { + cgltf_free_texture_view(data, &data->materials[i].specular.specular_texture); + cgltf_free_texture_view(data, &data->materials[i].specular.specular_color_texture); + } + if(data->materials[i].has_transmission) + { + cgltf_free_texture_view(data, &data->materials[i].transmission.transmission_texture); + } + if (data->materials[i].has_volume) + { + cgltf_free_texture_view(data, &data->materials[i].volume.thickness_texture); + } + if(data->materials[i].has_sheen) + { + cgltf_free_texture_view(data, &data->materials[i].sheen.sheen_color_texture); + cgltf_free_texture_view(data, &data->materials[i].sheen.sheen_roughness_texture); + } + if(data->materials[i].has_iridescence) + { + cgltf_free_texture_view(data, &data->materials[i].iridescence.iridescence_texture); + cgltf_free_texture_view(data, &data->materials[i].iridescence.iridescence_thickness_texture); + } + if (data->materials[i].has_anisotropy) + { + cgltf_free_texture_view(data, &data->materials[i].anisotropy.anisotropy_texture); + } + + cgltf_free_texture_view(data, &data->materials[i].normal_texture); + cgltf_free_texture_view(data, &data->materials[i].occlusion_texture); + cgltf_free_texture_view(data, &data->materials[i].emissive_texture); + + cgltf_free_extensions(data, data->materials[i].extensions, data->materials[i].extensions_count); + cgltf_free_extras(data, &data->materials[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->materials); + + for (cgltf_size i = 0; i < data->images_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->images[i].name); + data->memory.free_func(data->memory.user_data, data->images[i].uri); + data->memory.free_func(data->memory.user_data, data->images[i].mime_type); + + cgltf_free_extensions(data, data->images[i].extensions, data->images[i].extensions_count); + cgltf_free_extras(data, &data->images[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->images); + + for (cgltf_size i = 0; i < data->textures_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->textures[i].name); + + cgltf_free_extensions(data, data->textures[i].extensions, data->textures[i].extensions_count); + cgltf_free_extras(data, &data->textures[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->textures); + + for (cgltf_size i = 0; i < data->samplers_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->samplers[i].name); + + cgltf_free_extensions(data, data->samplers[i].extensions, data->samplers[i].extensions_count); + cgltf_free_extras(data, &data->samplers[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->samplers); + + for (cgltf_size i = 0; i < data->skins_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->skins[i].name); + data->memory.free_func(data->memory.user_data, data->skins[i].joints); + + cgltf_free_extensions(data, data->skins[i].extensions, data->skins[i].extensions_count); + cgltf_free_extras(data, &data->skins[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->skins); + + for (cgltf_size i = 0; i < data->cameras_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->cameras[i].name); + + if (data->cameras[i].type == cgltf_camera_type_perspective) + { + cgltf_free_extras(data, &data->cameras[i].data.perspective.extras); + } + else if (data->cameras[i].type == cgltf_camera_type_orthographic) + { + cgltf_free_extras(data, &data->cameras[i].data.orthographic.extras); + } + + cgltf_free_extensions(data, data->cameras[i].extensions, data->cameras[i].extensions_count); + cgltf_free_extras(data, &data->cameras[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->cameras); + + for (cgltf_size i = 0; i < data->lights_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->lights[i].name); + + cgltf_free_extras(data, &data->lights[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->lights); + + for (cgltf_size i = 0; i < data->nodes_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->nodes[i].name); + data->memory.free_func(data->memory.user_data, data->nodes[i].children); + data->memory.free_func(data->memory.user_data, data->nodes[i].weights); + + if (data->nodes[i].has_mesh_gpu_instancing) + { + for (cgltf_size j = 0; j < data->nodes[i].mesh_gpu_instancing.attributes_count; ++j) + { + data->memory.free_func(data->memory.user_data, data->nodes[i].mesh_gpu_instancing.attributes[j].name); + } + + data->memory.free_func(data->memory.user_data, data->nodes[i].mesh_gpu_instancing.attributes); + } + + cgltf_free_extensions(data, data->nodes[i].extensions, data->nodes[i].extensions_count); + cgltf_free_extras(data, &data->nodes[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->nodes); + + for (cgltf_size i = 0; i < data->scenes_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->scenes[i].name); + data->memory.free_func(data->memory.user_data, data->scenes[i].nodes); + + cgltf_free_extensions(data, data->scenes[i].extensions, data->scenes[i].extensions_count); + cgltf_free_extras(data, &data->scenes[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->scenes); + + for (cgltf_size i = 0; i < data->animations_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->animations[i].name); + for (cgltf_size j = 0; j < data->animations[i].samplers_count; ++j) + { + cgltf_free_extensions(data, data->animations[i].samplers[j].extensions, data->animations[i].samplers[j].extensions_count); + cgltf_free_extras(data, &data->animations[i].samplers[j].extras); + } + data->memory.free_func(data->memory.user_data, data->animations[i].samplers); + + for (cgltf_size j = 0; j < data->animations[i].channels_count; ++j) + { + cgltf_free_extensions(data, data->animations[i].channels[j].extensions, data->animations[i].channels[j].extensions_count); + cgltf_free_extras(data, &data->animations[i].channels[j].extras); + } + data->memory.free_func(data->memory.user_data, data->animations[i].channels); + + cgltf_free_extensions(data, data->animations[i].extensions, data->animations[i].extensions_count); + cgltf_free_extras(data, &data->animations[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->animations); + + for (cgltf_size i = 0; i < data->variants_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->variants[i].name); + + cgltf_free_extras(data, &data->variants[i].extras); + } + + data->memory.free_func(data->memory.user_data, data->variants); + + cgltf_free_extensions(data, data->data_extensions, data->data_extensions_count); + cgltf_free_extras(data, &data->extras); + + for (cgltf_size i = 0; i < data->extensions_used_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->extensions_used[i]); + } + + data->memory.free_func(data->memory.user_data, data->extensions_used); + + for (cgltf_size i = 0; i < data->extensions_required_count; ++i) + { + data->memory.free_func(data->memory.user_data, data->extensions_required[i]); + } + + data->memory.free_func(data->memory.user_data, data->extensions_required); + + file_release(&data->memory, &data->file, data->file_data); + + data->memory.free_func(data->memory.user_data, data); +} + +void cgltf_node_transform_local(const cgltf_node* node, cgltf_float* out_matrix) +{ + cgltf_float* lm = out_matrix; + + if (node->has_matrix) + { + memcpy(lm, node->matrix, sizeof(float) * 16); + } + else + { + float tx = node->translation[0]; + float ty = node->translation[1]; + float tz = node->translation[2]; + + float qx = node->rotation[0]; + float qy = node->rotation[1]; + float qz = node->rotation[2]; + float qw = node->rotation[3]; + + float sx = node->scale[0]; + float sy = node->scale[1]; + float sz = node->scale[2]; + + lm[0] = (1 - 2 * qy*qy - 2 * qz*qz) * sx; + lm[1] = (2 * qx*qy + 2 * qz*qw) * sx; + lm[2] = (2 * qx*qz - 2 * qy*qw) * sx; + lm[3] = 0.f; + + lm[4] = (2 * qx*qy - 2 * qz*qw) * sy; + lm[5] = (1 - 2 * qx*qx - 2 * qz*qz) * sy; + lm[6] = (2 * qy*qz + 2 * qx*qw) * sy; + lm[7] = 0.f; + + lm[8] = (2 * qx*qz + 2 * qy*qw) * sz; + lm[9] = (2 * qy*qz - 2 * qx*qw) * sz; + lm[10] = (1 - 2 * qx*qx - 2 * qy*qy) * sz; + lm[11] = 0.f; + + lm[12] = tx; + lm[13] = ty; + lm[14] = tz; + lm[15] = 1.f; + } +} + +void cgltf_node_transform_world(const cgltf_node* node, cgltf_float* out_matrix) +{ + cgltf_float* lm = out_matrix; + cgltf_node_transform_local(node, lm); + + const cgltf_node* parent = node->parent; + + while (parent) + { + float pm[16]; + cgltf_node_transform_local(parent, pm); + + for (int i = 0; i < 4; ++i) + { + float l0 = lm[i * 4 + 0]; + float l1 = lm[i * 4 + 1]; + float l2 = lm[i * 4 + 2]; + + float r0 = l0 * pm[0] + l1 * pm[4] + l2 * pm[8]; + float r1 = l0 * pm[1] + l1 * pm[5] + l2 * pm[9]; + float r2 = l0 * pm[2] + l1 * pm[6] + l2 * pm[10]; + + lm[i * 4 + 0] = r0; + lm[i * 4 + 1] = r1; + lm[i * 4 + 2] = r2; + } + + lm[12] += pm[12]; + lm[13] += pm[13]; + lm[14] += pm[14]; + + parent = parent->parent; + } +} + +static cgltf_ssize cgltf_component_read_integer(const void* in, cgltf_component_type component_type) +{ + switch (component_type) + { + case cgltf_component_type_r_16: + return *((const int16_t*) in); + case cgltf_component_type_r_16u: + return *((const uint16_t*) in); + case cgltf_component_type_r_32u: + return *((const uint32_t*) in); + case cgltf_component_type_r_32f: + return (cgltf_ssize)*((const float*) in); + case cgltf_component_type_r_8: + return *((const int8_t*) in); + case cgltf_component_type_r_8u: + return *((const uint8_t*) in); + default: + return 0; + } +} + +static cgltf_size cgltf_component_read_index(const void* in, cgltf_component_type component_type) +{ + switch (component_type) + { + case cgltf_component_type_r_16u: + return *((const uint16_t*) in); + case cgltf_component_type_r_32u: + return *((const uint32_t*) in); + case cgltf_component_type_r_32f: + return (cgltf_size)((cgltf_ssize)*((const float*) in)); + case cgltf_component_type_r_8u: + return *((const uint8_t*) in); + default: + return 0; + } +} + +static cgltf_float cgltf_component_read_float(const void* in, cgltf_component_type component_type, cgltf_bool normalized) +{ + if (component_type == cgltf_component_type_r_32f) + { + return *((const float*) in); + } + + if (normalized) + { + switch (component_type) + { + // note: glTF spec doesn't currently define normalized conversions for 32-bit integers + case cgltf_component_type_r_16: + return *((const int16_t*) in) / (cgltf_float)32767; + case cgltf_component_type_r_16u: + return *((const uint16_t*) in) / (cgltf_float)65535; + case cgltf_component_type_r_8: + return *((const int8_t*) in) / (cgltf_float)127; + case cgltf_component_type_r_8u: + return *((const uint8_t*) in) / (cgltf_float)255; + default: + return 0; + } + } + + return (cgltf_float)cgltf_component_read_integer(in, component_type); +} + +static cgltf_bool cgltf_element_read_float(const uint8_t* element, cgltf_type type, cgltf_component_type component_type, cgltf_bool normalized, cgltf_float* out, cgltf_size element_size) +{ + cgltf_size num_components = cgltf_num_components(type); + + if (element_size < num_components) { + return 0; + } + + // There are three special cases for component extraction, see #data-alignment in the 2.0 spec. + + cgltf_size component_size = cgltf_component_size(component_type); + + if (type == cgltf_type_mat2 && component_size == 1) + { + out[0] = cgltf_component_read_float(element, component_type, normalized); + out[1] = cgltf_component_read_float(element + 1, component_type, normalized); + out[2] = cgltf_component_read_float(element + 4, component_type, normalized); + out[3] = cgltf_component_read_float(element + 5, component_type, normalized); + return 1; + } + + if (type == cgltf_type_mat3 && component_size == 1) + { + out[0] = cgltf_component_read_float(element, component_type, normalized); + out[1] = cgltf_component_read_float(element + 1, component_type, normalized); + out[2] = cgltf_component_read_float(element + 2, component_type, normalized); + out[3] = cgltf_component_read_float(element + 4, component_type, normalized); + out[4] = cgltf_component_read_float(element + 5, component_type, normalized); + out[5] = cgltf_component_read_float(element + 6, component_type, normalized); + out[6] = cgltf_component_read_float(element + 8, component_type, normalized); + out[7] = cgltf_component_read_float(element + 9, component_type, normalized); + out[8] = cgltf_component_read_float(element + 10, component_type, normalized); + return 1; + } + + if (type == cgltf_type_mat3 && component_size == 2) + { + out[0] = cgltf_component_read_float(element, component_type, normalized); + out[1] = cgltf_component_read_float(element + 2, component_type, normalized); + out[2] = cgltf_component_read_float(element + 4, component_type, normalized); + out[3] = cgltf_component_read_float(element + 8, component_type, normalized); + out[4] = cgltf_component_read_float(element + 10, component_type, normalized); + out[5] = cgltf_component_read_float(element + 12, component_type, normalized); + out[6] = cgltf_component_read_float(element + 16, component_type, normalized); + out[7] = cgltf_component_read_float(element + 18, component_type, normalized); + out[8] = cgltf_component_read_float(element + 20, component_type, normalized); + return 1; + } + + for (cgltf_size i = 0; i < num_components; ++i) + { + out[i] = cgltf_component_read_float(element + component_size * i, component_type, normalized); + } + return 1; +} + +const uint8_t* cgltf_buffer_view_data(const cgltf_buffer_view* view) +{ + if (view->data) + return (const uint8_t*)view->data; + + if (!view->buffer->data) + return NULL; + + const uint8_t* result = (const uint8_t*)view->buffer->data; + result += view->offset; + return result; +} + +cgltf_bool cgltf_accessor_read_float(const cgltf_accessor* accessor, cgltf_size index, cgltf_float* out, cgltf_size element_size) +{ + if (accessor->is_sparse) + { + return 0; + } + if (accessor->buffer_view == NULL) + { + memset(out, 0, element_size * sizeof(cgltf_float)); + return 1; + } + const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view); + if (element == NULL) + { + return 0; + } + element += accessor->offset + accessor->stride * index; + return cgltf_element_read_float(element, accessor->type, accessor->component_type, accessor->normalized, out, element_size); +} + +cgltf_size cgltf_accessor_unpack_floats(const cgltf_accessor* accessor, cgltf_float* out, cgltf_size float_count) +{ + cgltf_size floats_per_element = cgltf_num_components(accessor->type); + cgltf_size available_floats = accessor->count * floats_per_element; + if (out == NULL) + { + return available_floats; + } + + float_count = available_floats < float_count ? available_floats : float_count; + cgltf_size element_count = float_count / floats_per_element; + + // First pass: convert each element in the base accessor. + if (accessor->buffer_view == NULL) + { + memset(out, 0, element_count * floats_per_element * sizeof(cgltf_float)); + } + else + { + const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view); + if (element == NULL) + { + return 0; + } + element += accessor->offset; + + if (accessor->component_type == cgltf_component_type_r_32f && accessor->stride == floats_per_element * sizeof(cgltf_float)) + { + memcpy(out, element, element_count * floats_per_element * sizeof(cgltf_float)); + } + else + { + cgltf_float* dest = out; + + for (cgltf_size index = 0; index < element_count; index++, dest += floats_per_element, element += accessor->stride) + { + if (!cgltf_element_read_float(element, accessor->type, accessor->component_type, accessor->normalized, dest, floats_per_element)) + { + return 0; + } + } + } + } + + // Second pass: write out each element in the sparse accessor. + if (accessor->is_sparse) + { + const cgltf_accessor_sparse* sparse = &accessor->sparse; + + const uint8_t* index_data = cgltf_buffer_view_data(sparse->indices_buffer_view); + const uint8_t* reader_head = cgltf_buffer_view_data(sparse->values_buffer_view); + + if (index_data == NULL || reader_head == NULL) + { + return 0; + } + + index_data += sparse->indices_byte_offset; + reader_head += sparse->values_byte_offset; + + cgltf_size index_stride = cgltf_component_size(sparse->indices_component_type); + for (cgltf_size reader_index = 0; reader_index < sparse->count; reader_index++, index_data += index_stride, reader_head += accessor->stride) + { + size_t writer_index = cgltf_component_read_index(index_data, sparse->indices_component_type); + float* writer_head = out + writer_index * floats_per_element; + + if (!cgltf_element_read_float(reader_head, accessor->type, accessor->component_type, accessor->normalized, writer_head, floats_per_element)) + { + return 0; + } + } + } + + return element_count * floats_per_element; +} + +static cgltf_uint cgltf_component_read_uint(const void* in, cgltf_component_type component_type) +{ + switch (component_type) + { + case cgltf_component_type_r_8: + return *((const int8_t*) in); + + case cgltf_component_type_r_8u: + return *((const uint8_t*) in); + + case cgltf_component_type_r_16: + return *((const int16_t*) in); + + case cgltf_component_type_r_16u: + return *((const uint16_t*) in); + + case cgltf_component_type_r_32u: + return *((const uint32_t*) in); + + default: + return 0; + } +} + +static cgltf_bool cgltf_element_read_uint(const uint8_t* element, cgltf_type type, cgltf_component_type component_type, cgltf_uint* out, cgltf_size element_size) +{ + cgltf_size num_components = cgltf_num_components(type); + + if (element_size < num_components) + { + return 0; + } + + // Reading integer matrices is not a valid use case + if (type == cgltf_type_mat2 || type == cgltf_type_mat3 || type == cgltf_type_mat4) + { + return 0; + } + + cgltf_size component_size = cgltf_component_size(component_type); + + for (cgltf_size i = 0; i < num_components; ++i) + { + out[i] = cgltf_component_read_uint(element + component_size * i, component_type); + } + return 1; +} + +cgltf_bool cgltf_accessor_read_uint(const cgltf_accessor* accessor, cgltf_size index, cgltf_uint* out, cgltf_size element_size) +{ + if (accessor->is_sparse) + { + return 0; + } + if (accessor->buffer_view == NULL) + { + memset(out, 0, element_size * sizeof( cgltf_uint )); + return 1; + } + const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view); + if (element == NULL) + { + return 0; + } + element += accessor->offset + accessor->stride * index; + return cgltf_element_read_uint(element, accessor->type, accessor->component_type, out, element_size); +} + +cgltf_size cgltf_accessor_read_index(const cgltf_accessor* accessor, cgltf_size index) +{ + if (accessor->is_sparse) + { + return 0; // This is an error case, but we can't communicate the error with existing interface. + } + if (accessor->buffer_view == NULL) + { + return 0; + } + const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view); + if (element == NULL) + { + return 0; // This is an error case, but we can't communicate the error with existing interface. + } + element += accessor->offset + accessor->stride * index; + return cgltf_component_read_index(element, accessor->component_type); +} + +cgltf_size cgltf_mesh_index(const cgltf_data* data, const cgltf_mesh* object) +{ + assert(object && (cgltf_size)(object - data->meshes) < data->meshes_count); + return (cgltf_size)(object - data->meshes); +} + +cgltf_size cgltf_material_index(const cgltf_data* data, const cgltf_material* object) +{ + assert(object && (cgltf_size)(object - data->materials) < data->materials_count); + return (cgltf_size)(object - data->materials); +} + +cgltf_size cgltf_accessor_index(const cgltf_data* data, const cgltf_accessor* object) +{ + assert(object && (cgltf_size)(object - data->accessors) < data->accessors_count); + return (cgltf_size)(object - data->accessors); +} + +cgltf_size cgltf_buffer_view_index(const cgltf_data* data, const cgltf_buffer_view* object) +{ + assert(object && (cgltf_size)(object - data->buffer_views) < data->buffer_views_count); + return (cgltf_size)(object - data->buffer_views); +} + +cgltf_size cgltf_buffer_index(const cgltf_data* data, const cgltf_buffer* object) +{ + assert(object && (cgltf_size)(object - data->buffers) < data->buffers_count); + return (cgltf_size)(object - data->buffers); +} + +cgltf_size cgltf_image_index(const cgltf_data* data, const cgltf_image* object) +{ + assert(object && (cgltf_size)(object - data->images) < data->images_count); + return (cgltf_size)(object - data->images); +} + +cgltf_size cgltf_texture_index(const cgltf_data* data, const cgltf_texture* object) +{ + assert(object && (cgltf_size)(object - data->textures) < data->textures_count); + return (cgltf_size)(object - data->textures); +} + +cgltf_size cgltf_sampler_index(const cgltf_data* data, const cgltf_sampler* object) +{ + assert(object && (cgltf_size)(object - data->samplers) < data->samplers_count); + return (cgltf_size)(object - data->samplers); +} + +cgltf_size cgltf_skin_index(const cgltf_data* data, const cgltf_skin* object) +{ + assert(object && (cgltf_size)(object - data->skins) < data->skins_count); + return (cgltf_size)(object - data->skins); +} + +cgltf_size cgltf_camera_index(const cgltf_data* data, const cgltf_camera* object) +{ + assert(object && (cgltf_size)(object - data->cameras) < data->cameras_count); + return (cgltf_size)(object - data->cameras); +} + +cgltf_size cgltf_light_index(const cgltf_data* data, const cgltf_light* object) +{ + assert(object && (cgltf_size)(object - data->lights) < data->lights_count); + return (cgltf_size)(object - data->lights); +} + +cgltf_size cgltf_node_index(const cgltf_data* data, const cgltf_node* object) +{ + assert(object && (cgltf_size)(object - data->nodes) < data->nodes_count); + return (cgltf_size)(object - data->nodes); +} + +cgltf_size cgltf_scene_index(const cgltf_data* data, const cgltf_scene* object) +{ + assert(object && (cgltf_size)(object - data->scenes) < data->scenes_count); + return (cgltf_size)(object - data->scenes); +} + +cgltf_size cgltf_animation_index(const cgltf_data* data, const cgltf_animation* object) +{ + assert(object && (cgltf_size)(object - data->animations) < data->animations_count); + return (cgltf_size)(object - data->animations); +} + +cgltf_size cgltf_animation_sampler_index(const cgltf_animation* animation, const cgltf_animation_sampler* object) +{ + assert(object && (cgltf_size)(object - animation->samplers) < animation->samplers_count); + return (cgltf_size)(object - animation->samplers); +} + +cgltf_size cgltf_animation_channel_index(const cgltf_animation* animation, const cgltf_animation_channel* object) +{ + assert(object && (cgltf_size)(object - animation->channels) < animation->channels_count); + return (cgltf_size)(object - animation->channels); +} + +cgltf_size cgltf_accessor_unpack_indices(const cgltf_accessor* accessor, cgltf_uint* out, cgltf_size index_count) +{ + if (out == NULL) + { + return accessor->count; + } + + index_count = accessor->count < index_count ? accessor->count : index_count; + + if (accessor->is_sparse) + { + return 0; + } + if (accessor->buffer_view == NULL) + { + return 0; + } + const uint8_t* element = cgltf_buffer_view_data(accessor->buffer_view); + if (element == NULL) + { + return 0; + } + element += accessor->offset; + + if (accessor->component_type == cgltf_component_type_r_32u && accessor->stride == sizeof(cgltf_uint)) + { + memcpy(out, element, index_count * sizeof(cgltf_uint)); + } + else + { + cgltf_uint* dest = out; + + for (cgltf_size index = 0; index < index_count; index++, dest++, element += accessor->stride) + { + *dest = (cgltf_uint)cgltf_component_read_index(element, accessor->component_type); + } + } + + return index_count; +} + +#define CGLTF_ERROR_JSON -1 +#define CGLTF_ERROR_NOMEM -2 +#define CGLTF_ERROR_LEGACY -3 + +#define CGLTF_CHECK_TOKTYPE(tok_, type_) if ((tok_).type != (type_)) { return CGLTF_ERROR_JSON; } +#define CGLTF_CHECK_TOKTYPE_RETTYPE(tok_, type_, ret_) if ((tok_).type != (type_)) { return (ret_)CGLTF_ERROR_JSON; } +#define CGLTF_CHECK_KEY(tok_) if ((tok_).type != JSMN_STRING || (tok_).size == 0) { return CGLTF_ERROR_JSON; } /* checking size for 0 verifies that a value follows the key */ + +#define CGLTF_PTRINDEX(type, idx) (type*)((cgltf_size)idx + 1) +#define CGLTF_PTRFIXUP(var, data, size) if (var) { if ((cgltf_size)var > size) { return CGLTF_ERROR_JSON; } var = &data[(cgltf_size)var-1]; } +#define CGLTF_PTRFIXUP_REQ(var, data, size) if (!var || (cgltf_size)var > size) { return CGLTF_ERROR_JSON; } var = &data[(cgltf_size)var-1]; + +static int cgltf_json_strcmp(jsmntok_t const* tok, const uint8_t* json_chunk, const char* str) +{ + CGLTF_CHECK_TOKTYPE(*tok, JSMN_STRING); + size_t const str_len = strlen(str); + size_t const name_length = (size_t)(tok->end - tok->start); + return (str_len == name_length) ? strncmp((const char*)json_chunk + tok->start, str, str_len) : 128; +} + +static int cgltf_json_to_int(jsmntok_t const* tok, const uint8_t* json_chunk) +{ + CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE); + char tmp[128]; + int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1); + strncpy(tmp, (const char*)json_chunk + tok->start, size); + tmp[size] = 0; + return CGLTF_ATOI(tmp); +} + +static cgltf_size cgltf_json_to_size(jsmntok_t const* tok, const uint8_t* json_chunk) +{ + CGLTF_CHECK_TOKTYPE_RETTYPE(*tok, JSMN_PRIMITIVE, cgltf_size); + char tmp[128]; + int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1); + strncpy(tmp, (const char*)json_chunk + tok->start, size); + tmp[size] = 0; + return (cgltf_size)CGLTF_ATOLL(tmp); +} + +static cgltf_float cgltf_json_to_float(jsmntok_t const* tok, const uint8_t* json_chunk) +{ + CGLTF_CHECK_TOKTYPE(*tok, JSMN_PRIMITIVE); + char tmp[128]; + int size = (size_t)(tok->end - tok->start) < sizeof(tmp) ? (int)(tok->end - tok->start) : (int)(sizeof(tmp) - 1); + strncpy(tmp, (const char*)json_chunk + tok->start, size); + tmp[size] = 0; + return (cgltf_float)CGLTF_ATOF(tmp); +} + +static cgltf_bool cgltf_json_to_bool(jsmntok_t const* tok, const uint8_t* json_chunk) +{ + int size = (int)(tok->end - tok->start); + return size == 4 && memcmp(json_chunk + tok->start, "true", 4) == 0; +} + +static int cgltf_skip_json(jsmntok_t const* tokens, int i) +{ + int end = i + 1; + + while (i < end) + { + switch (tokens[i].type) + { + case JSMN_OBJECT: + end += tokens[i].size * 2; + break; + + case JSMN_ARRAY: + end += tokens[i].size; + break; + + case JSMN_PRIMITIVE: + case JSMN_STRING: + break; + + default: + return -1; + } + + i++; + } + + return i; +} + +static void cgltf_fill_float_array(float* out_array, int size, float value) +{ + for (int j = 0; j < size; ++j) + { + out_array[j] = value; + } +} + +static int cgltf_parse_json_float_array(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, float* out_array, int size) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY); + if (tokens[i].size != size) + { + return CGLTF_ERROR_JSON; + } + ++i; + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_array[j] = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + return i; +} + +static int cgltf_parse_json_string(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, char** out_string) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_STRING); + if (*out_string) + { + return CGLTF_ERROR_JSON; + } + int size = (int)(tokens[i].end - tokens[i].start); + char* result = (char*)options->memory.alloc_func(options->memory.user_data, size + 1); + if (!result) + { + return CGLTF_ERROR_NOMEM; + } + strncpy(result, (const char*)json_chunk + tokens[i].start, size); + result[size] = 0; + *out_string = result; + return i + 1; +} + +static int cgltf_parse_json_array(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, size_t element_size, void** out_array, cgltf_size* out_size) +{ + (void)json_chunk; + if (tokens[i].type != JSMN_ARRAY) + { + return tokens[i].type == JSMN_OBJECT ? CGLTF_ERROR_LEGACY : CGLTF_ERROR_JSON; + } + if (*out_array) + { + return CGLTF_ERROR_JSON; + } + int size = tokens[i].size; + void* result = cgltf_calloc(options, element_size, size); + if (!result) + { + return CGLTF_ERROR_NOMEM; + } + *out_array = result; + *out_size = size; + return i + 1; +} + +static int cgltf_parse_json_string_array(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, char*** out_array, cgltf_size* out_size) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY); + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(char*), (void**)out_array, out_size); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < *out_size; ++j) + { + i = cgltf_parse_json_string(options, tokens, i, json_chunk, j + (*out_array)); + if (i < 0) + { + return i; + } + } + return i; +} + +static void cgltf_parse_attribute_type(const char* name, cgltf_attribute_type* out_type, int* out_index) +{ + if (*name == '_') + { + *out_type = cgltf_attribute_type_custom; + return; + } + + const char* us = strchr(name, '_'); + size_t len = us ? (size_t)(us - name) : strlen(name); + + if (len == 8 && strncmp(name, "POSITION", 8) == 0) + { + *out_type = cgltf_attribute_type_position; + } + else if (len == 6 && strncmp(name, "NORMAL", 6) == 0) + { + *out_type = cgltf_attribute_type_normal; + } + else if (len == 7 && strncmp(name, "TANGENT", 7) == 0) + { + *out_type = cgltf_attribute_type_tangent; + } + else if (len == 8 && strncmp(name, "TEXCOORD", 8) == 0) + { + *out_type = cgltf_attribute_type_texcoord; + } + else if (len == 5 && strncmp(name, "COLOR", 5) == 0) + { + *out_type = cgltf_attribute_type_color; + } + else if (len == 6 && strncmp(name, "JOINTS", 6) == 0) + { + *out_type = cgltf_attribute_type_joints; + } + else if (len == 7 && strncmp(name, "WEIGHTS", 7) == 0) + { + *out_type = cgltf_attribute_type_weights; + } + else + { + *out_type = cgltf_attribute_type_invalid; + } + + if (us && *out_type != cgltf_attribute_type_invalid) + { + *out_index = CGLTF_ATOI(us + 1); + } +} + +static int cgltf_parse_json_attribute_list(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_attribute** out_attributes, cgltf_size* out_attributes_count) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + if (*out_attributes) + { + return CGLTF_ERROR_JSON; + } + + *out_attributes_count = tokens[i].size; + *out_attributes = (cgltf_attribute*)cgltf_calloc(options, sizeof(cgltf_attribute), *out_attributes_count); + ++i; + + if (!*out_attributes) + { + return CGLTF_ERROR_NOMEM; + } + + for (cgltf_size j = 0; j < *out_attributes_count; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + i = cgltf_parse_json_string(options, tokens, i, json_chunk, &(*out_attributes)[j].name); + if (i < 0) + { + return CGLTF_ERROR_JSON; + } + + cgltf_parse_attribute_type((*out_attributes)[j].name, &(*out_attributes)[j].type, &(*out_attributes)[j].index); + + (*out_attributes)[j].data = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + + return i; +} + +static int cgltf_parse_json_extras(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_extras* out_extras) +{ + if (out_extras->data) + { + return CGLTF_ERROR_JSON; + } + + /* fill deprecated fields for now, this will be removed in the future */ + out_extras->start_offset = tokens[i].start; + out_extras->end_offset = tokens[i].end; + + size_t start = tokens[i].start; + size_t size = tokens[i].end - start; + out_extras->data = (char*)options->memory.alloc_func(options->memory.user_data, size + 1); + if (!out_extras->data) + { + return CGLTF_ERROR_NOMEM; + } + strncpy(out_extras->data, (const char*)json_chunk + start, size); + out_extras->data[size] = '\0'; + + i = cgltf_skip_json(tokens, i); + return i; +} + +static int cgltf_parse_json_unprocessed_extension(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_extension* out_extension) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_STRING); + CGLTF_CHECK_TOKTYPE(tokens[i+1], JSMN_OBJECT); + if (out_extension->name) + { + return CGLTF_ERROR_JSON; + } + + cgltf_size name_length = tokens[i].end - tokens[i].start; + out_extension->name = (char*)options->memory.alloc_func(options->memory.user_data, name_length + 1); + if (!out_extension->name) + { + return CGLTF_ERROR_NOMEM; + } + strncpy(out_extension->name, (const char*)json_chunk + tokens[i].start, name_length); + out_extension->name[name_length] = 0; + i++; + + size_t start = tokens[i].start; + size_t size = tokens[i].end - start; + out_extension->data = (char*)options->memory.alloc_func(options->memory.user_data, size + 1); + if (!out_extension->data) + { + return CGLTF_ERROR_NOMEM; + } + strncpy(out_extension->data, (const char*)json_chunk + start, size); + out_extension->data[size] = '\0'; + + i = cgltf_skip_json(tokens, i); + + return i; +} + +static int cgltf_parse_json_unprocessed_extensions(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_size* out_extensions_count, cgltf_extension** out_extensions) +{ + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(*out_extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + *out_extensions_count = 0; + *out_extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!*out_extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + + for (int j = 0; j < extensions_size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + cgltf_size extension_index = (*out_extensions_count)++; + cgltf_extension* extension = &((*out_extensions)[extension_index]); + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, extension); + + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_draco_mesh_compression(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_draco_mesh_compression* out_draco_mesh_compression) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "attributes") == 0) + { + i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_draco_mesh_compression->attributes, &out_draco_mesh_compression->attributes_count); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "bufferView") == 0) + { + ++i; + out_draco_mesh_compression->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_mesh_gpu_instancing(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_mesh_gpu_instancing* out_mesh_gpu_instancing) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "attributes") == 0) + { + i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_mesh_gpu_instancing->attributes, &out_mesh_gpu_instancing->attributes_count); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_material_mapping_data(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material_mapping* out_mappings, cgltf_size* offset) +{ + (void)options; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_ARRAY); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int obj_size = tokens[i].size; + ++i; + + int material = -1; + int variants_tok = -1; + int extras_tok = -1; + + for (int k = 0; k < obj_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "material") == 0) + { + ++i; + material = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "variants") == 0) + { + variants_tok = i+1; + CGLTF_CHECK_TOKTYPE(tokens[variants_tok], JSMN_ARRAY); + + i = cgltf_skip_json(tokens, i+1); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + extras_tok = i + 1; + i = cgltf_skip_json(tokens, extras_tok); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + if (material < 0 || variants_tok < 0) + { + return CGLTF_ERROR_JSON; + } + + if (out_mappings) + { + for (int k = 0; k < tokens[variants_tok].size; ++k) + { + int variant = cgltf_json_to_int(&tokens[variants_tok + 1 + k], json_chunk); + if (variant < 0) + return variant; + + out_mappings[*offset].material = CGLTF_PTRINDEX(cgltf_material, material); + out_mappings[*offset].variant = variant; + + if (extras_tok >= 0) + { + int e = cgltf_parse_json_extras(options, tokens, extras_tok, json_chunk, &out_mappings[*offset].extras); + if (e < 0) + return e; + } + + (*offset)++; + } + } + else + { + (*offset) += tokens[variants_tok].size; + } + } + + return i; +} + +static int cgltf_parse_json_material_mappings(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_primitive* out_prim) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "mappings") == 0) + { + if (out_prim->mappings) + { + return CGLTF_ERROR_JSON; + } + + cgltf_size mappings_offset = 0; + int k = cgltf_parse_json_material_mapping_data(options, tokens, i + 1, json_chunk, NULL, &mappings_offset); + if (k < 0) + { + return k; + } + + out_prim->mappings_count = mappings_offset; + out_prim->mappings = (cgltf_material_mapping*)cgltf_calloc(options, sizeof(cgltf_material_mapping), out_prim->mappings_count); + + mappings_offset = 0; + i = cgltf_parse_json_material_mapping_data(options, tokens, i + 1, json_chunk, out_prim->mappings, &mappings_offset); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_primitive(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_primitive* out_prim) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + out_prim->type = cgltf_primitive_type_triangles; + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "mode") == 0) + { + ++i; + out_prim->type + = (cgltf_primitive_type) + cgltf_json_to_int(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "indices") == 0) + { + ++i; + out_prim->indices = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "material") == 0) + { + ++i; + out_prim->material = CGLTF_PTRINDEX(cgltf_material, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "attributes") == 0) + { + i = cgltf_parse_json_attribute_list(options, tokens, i + 1, json_chunk, &out_prim->attributes, &out_prim->attributes_count); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "targets") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_morph_target), (void**)&out_prim->targets, &out_prim->targets_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_prim->targets_count; ++k) + { + i = cgltf_parse_json_attribute_list(options, tokens, i, json_chunk, &out_prim->targets[k].attributes, &out_prim->targets[k].attributes_count); + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_prim->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_prim->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + out_prim->extensions_count = 0; + out_prim->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!out_prim->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_draco_mesh_compression") == 0) + { + out_prim->has_draco_mesh_compression = 1; + i = cgltf_parse_json_draco_mesh_compression(options, tokens, i + 1, json_chunk, &out_prim->draco_mesh_compression); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_variants") == 0) + { + i = cgltf_parse_json_material_mappings(options, tokens, i + 1, json_chunk, out_prim); + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_prim->extensions[out_prim->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_mesh(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_mesh* out_mesh) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_mesh->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "primitives") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_primitive), (void**)&out_mesh->primitives, &out_mesh->primitives_count); + if (i < 0) + { + return i; + } + + for (cgltf_size prim_index = 0; prim_index < out_mesh->primitives_count; ++prim_index) + { + i = cgltf_parse_json_primitive(options, tokens, i, json_chunk, &out_mesh->primitives[prim_index]); + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "weights") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_float), (void**)&out_mesh->weights, &out_mesh->weights_count); + if (i < 0) + { + return i; + } + + i = cgltf_parse_json_float_array(tokens, i - 1, json_chunk, out_mesh->weights, (int)out_mesh->weights_count); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + ++i; + + out_mesh->extras.start_offset = tokens[i].start; + out_mesh->extras.end_offset = tokens[i].end; + + if (tokens[i].type == JSMN_OBJECT) + { + int extras_size = tokens[i].size; + ++i; + + for (int k = 0; k < extras_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "targetNames") == 0 && tokens[i+1].type == JSMN_ARRAY) + { + i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_mesh->target_names, &out_mesh->target_names_count); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i); + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_mesh->extensions_count, &out_mesh->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_meshes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_mesh), (void**)&out_data->meshes, &out_data->meshes_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->meshes_count; ++j) + { + i = cgltf_parse_json_mesh(options, tokens, i, json_chunk, &out_data->meshes[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static cgltf_component_type cgltf_json_to_component_type(jsmntok_t const* tok, const uint8_t* json_chunk) +{ + int type = cgltf_json_to_int(tok, json_chunk); + + switch (type) + { + case 5120: + return cgltf_component_type_r_8; + case 5121: + return cgltf_component_type_r_8u; + case 5122: + return cgltf_component_type_r_16; + case 5123: + return cgltf_component_type_r_16u; + case 5125: + return cgltf_component_type_r_32u; + case 5126: + return cgltf_component_type_r_32f; + default: + return cgltf_component_type_invalid; + } +} + +static int cgltf_parse_json_accessor_sparse(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_accessor_sparse* out_sparse) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0) + { + ++i; + out_sparse->count = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "indices") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int indices_size = tokens[i].size; + ++i; + + for (int k = 0; k < indices_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0) + { + ++i; + out_sparse->indices_buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0) + { + ++i; + out_sparse->indices_byte_offset = cgltf_json_to_size(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0) + { + ++i; + out_sparse->indices_component_type = cgltf_json_to_component_type(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->indices_extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->indices_extensions_count, &out_sparse->indices_extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "values") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int values_size = tokens[i].size; + ++i; + + for (int k = 0; k < values_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0) + { + ++i; + out_sparse->values_buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0) + { + ++i; + out_sparse->values_byte_offset = cgltf_json_to_size(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->values_extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->values_extensions_count, &out_sparse->values_extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sparse->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sparse->extensions_count, &out_sparse->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_accessor(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_accessor* out_accessor) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_accessor->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0) + { + ++i; + out_accessor->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0) + { + ++i; + out_accessor->offset = + cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "componentType") == 0) + { + ++i; + out_accessor->component_type = cgltf_json_to_component_type(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "normalized") == 0) + { + ++i; + out_accessor->normalized = cgltf_json_to_bool(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0) + { + ++i; + out_accessor->count = + cgltf_json_to_int(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "type") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens+i, json_chunk, "SCALAR") == 0) + { + out_accessor->type = cgltf_type_scalar; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC2") == 0) + { + out_accessor->type = cgltf_type_vec2; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC3") == 0) + { + out_accessor->type = cgltf_type_vec3; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "VEC4") == 0) + { + out_accessor->type = cgltf_type_vec4; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT2") == 0) + { + out_accessor->type = cgltf_type_mat2; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT3") == 0) + { + out_accessor->type = cgltf_type_mat3; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "MAT4") == 0) + { + out_accessor->type = cgltf_type_mat4; + } + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "min") == 0) + { + ++i; + out_accessor->has_min = 1; + // note: we can't parse the precise number of elements since type may not have been computed yet + int min_size = tokens[i].size > 16 ? 16 : tokens[i].size; + i = cgltf_parse_json_float_array(tokens, i, json_chunk, out_accessor->min, min_size); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "max") == 0) + { + ++i; + out_accessor->has_max = 1; + // note: we can't parse the precise number of elements since type may not have been computed yet + int max_size = tokens[i].size > 16 ? 16 : tokens[i].size; + i = cgltf_parse_json_float_array(tokens, i, json_chunk, out_accessor->max, max_size); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "sparse") == 0) + { + out_accessor->is_sparse = 1; + i = cgltf_parse_json_accessor_sparse(options, tokens, i + 1, json_chunk, &out_accessor->sparse); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_accessor->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_accessor->extensions_count, &out_accessor->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_texture_transform(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture_transform* out_texture_transform) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "offset") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_texture_transform->offset, 2); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "rotation") == 0) + { + ++i; + out_texture_transform->rotation = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "scale") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_texture_transform->scale, 2); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0) + { + ++i; + out_texture_transform->has_texcoord = 1; + out_texture_transform->texcoord = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_texture_view(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture_view* out_texture_view) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + out_texture_view->scale = 1.0f; + cgltf_fill_float_array(out_texture_view->transform.scale, 2, 1.0f); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "index") == 0) + { + ++i; + out_texture_view->texture = CGLTF_PTRINDEX(cgltf_texture, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "texCoord") == 0) + { + ++i; + out_texture_view->texcoord = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "scale") == 0) + { + ++i; + out_texture_view->scale = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "strength") == 0) + { + ++i; + out_texture_view->scale = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_texture_view->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_texture_view->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + out_texture_view->extensions_count = 0; + out_texture_view->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!out_texture_view->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_texture_transform") == 0) + { + out_texture_view->has_transform = 1; + i = cgltf_parse_json_texture_transform(tokens, i + 1, json_chunk, &out_texture_view->transform); + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_texture_view->extensions[out_texture_view->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_pbr_metallic_roughness(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_pbr_metallic_roughness* out_pbr) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "metallicFactor") == 0) + { + ++i; + out_pbr->metallic_factor = + cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "roughnessFactor") == 0) + { + ++i; + out_pbr->roughness_factor = + cgltf_json_to_float(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "baseColorFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->base_color_factor, 4); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "baseColorTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, + &out_pbr->base_color_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "metallicRoughnessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, + &out_pbr->metallic_roughness_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_pbr_specular_glossiness(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_pbr_specular_glossiness* out_pbr) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "diffuseFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->diffuse_factor, 4); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_pbr->specular_factor, 3); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "glossinessFactor") == 0) + { + ++i; + out_pbr->glossiness_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "diffuseTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_pbr->diffuse_texture); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularGlossinessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_pbr->specular_glossiness_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_clearcoat(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_clearcoat* out_clearcoat) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatFactor") == 0) + { + ++i; + out_clearcoat->clearcoat_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatRoughnessFactor") == 0) + { + ++i; + out_clearcoat->clearcoat_roughness_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_texture); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatRoughnessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_roughness_texture); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "clearcoatNormalTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_clearcoat->clearcoat_normal_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_ior(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_ior* out_ior) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + // Default values + out_ior->ior = 1.5f; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "ior") == 0) + { + ++i; + out_ior->ior = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_specular(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_specular* out_specular) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + // Default values + out_specular->specular_factor = 1.0f; + cgltf_fill_float_array(out_specular->specular_color_factor, 3, 1.0f); + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "specularFactor") == 0) + { + ++i; + out_specular->specular_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularColorFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_specular->specular_color_factor, 3); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "specularTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_specular->specular_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "specularColorTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_specular->specular_color_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_transmission(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_transmission* out_transmission) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "transmissionFactor") == 0) + { + ++i; + out_transmission->transmission_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "transmissionTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_transmission->transmission_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_volume(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_volume* out_volume) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "thicknessFactor") == 0) + { + ++i; + out_volume->thickness_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "thicknessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_volume->thickness_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "attenuationColor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_volume->attenuation_color, 3); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "attenuationDistance") == 0) + { + ++i; + out_volume->attenuation_distance = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_sheen(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_sheen* out_sheen) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenColorFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_sheen->sheen_color_factor, 3); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenColorTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_sheen->sheen_color_texture); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenRoughnessFactor") == 0) + { + ++i; + out_sheen->sheen_roughness_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "sheenRoughnessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_sheen->sheen_roughness_texture); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_emissive_strength(jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_emissive_strength* out_emissive_strength) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + // Default + out_emissive_strength->emissive_strength = 1.f; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "emissiveStrength") == 0) + { + ++i; + out_emissive_strength->emissive_strength = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_iridescence(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_iridescence* out_iridescence) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + // Default + out_iridescence->iridescence_ior = 1.3f; + out_iridescence->iridescence_thickness_min = 100.f; + out_iridescence->iridescence_thickness_max = 400.f; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceFactor") == 0) + { + ++i; + out_iridescence->iridescence_factor = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_iridescence->iridescence_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceIor") == 0) + { + ++i; + out_iridescence->iridescence_ior = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessMinimum") == 0) + { + ++i; + out_iridescence->iridescence_thickness_min = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessMaximum") == 0) + { + ++i; + out_iridescence->iridescence_thickness_max = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "iridescenceThicknessTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_iridescence->iridescence_thickness_texture); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_anisotropy(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_anisotropy* out_anisotropy) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int size = tokens[i].size; + ++i; + + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyStrength") == 0) + { + ++i; + out_anisotropy->anisotropy_strength = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyRotation") == 0) + { + ++i; + out_anisotropy->anisotropy_rotation = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "anisotropyTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, &out_anisotropy->anisotropy_texture); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_image(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_image* out_image) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "uri") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->uri); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "bufferView") == 0) + { + ++i; + out_image->buffer_view = CGLTF_PTRINDEX(cgltf_buffer_view, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "mimeType") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->mime_type); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_image->name); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_image->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_image->extensions_count, &out_image->extensions); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_sampler(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_sampler* out_sampler) +{ + (void)options; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + out_sampler->wrap_s = 10497; + out_sampler->wrap_t = 10497; + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_sampler->name); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "magFilter") == 0) + { + ++i; + out_sampler->mag_filter + = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "minFilter") == 0) + { + ++i; + out_sampler->min_filter + = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "wrapS") == 0) + { + ++i; + out_sampler->wrap_s + = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "wrapT") == 0) + { + ++i; + out_sampler->wrap_t + = cgltf_json_to_int(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sampler->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sampler->extensions_count, &out_sampler->extensions); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_texture(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_texture* out_texture) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_texture->name); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "sampler") == 0) + { + ++i; + out_texture->sampler = CGLTF_PTRINDEX(cgltf_sampler, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "source") == 0) + { + ++i; + out_texture->image = CGLTF_PTRINDEX(cgltf_image, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_texture->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if (out_texture->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + ++i; + out_texture->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + out_texture->extensions_count = 0; + + if (!out_texture->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_texture_basisu") == 0) + { + out_texture->has_basisu = 1; + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + int num_properties = tokens[i].size; + ++i; + + for (int t = 0; t < num_properties; ++t) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "source") == 0) + { + ++i; + out_texture->basisu_image = CGLTF_PTRINDEX(cgltf_image, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_texture->extensions[out_texture->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_material(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material* out_material) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + cgltf_fill_float_array(out_material->pbr_metallic_roughness.base_color_factor, 4, 1.0f); + out_material->pbr_metallic_roughness.metallic_factor = 1.0f; + out_material->pbr_metallic_roughness.roughness_factor = 1.0f; + + cgltf_fill_float_array(out_material->pbr_specular_glossiness.diffuse_factor, 4, 1.0f); + cgltf_fill_float_array(out_material->pbr_specular_glossiness.specular_factor, 3, 1.0f); + out_material->pbr_specular_glossiness.glossiness_factor = 1.0f; + + cgltf_fill_float_array(out_material->volume.attenuation_color, 3, 1.0f); + out_material->volume.attenuation_distance = FLT_MAX; + + out_material->alpha_cutoff = 0.5f; + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_material->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "pbrMetallicRoughness") == 0) + { + out_material->has_pbr_metallic_roughness = 1; + i = cgltf_parse_json_pbr_metallic_roughness(options, tokens, i + 1, json_chunk, &out_material->pbr_metallic_roughness); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "emissiveFactor") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_material->emissive_factor, 3); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "normalTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, + &out_material->normal_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "occlusionTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, + &out_material->occlusion_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "emissiveTexture") == 0) + { + i = cgltf_parse_json_texture_view(options, tokens, i + 1, json_chunk, + &out_material->emissive_texture); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "alphaMode") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens + i, json_chunk, "OPAQUE") == 0) + { + out_material->alpha_mode = cgltf_alpha_mode_opaque; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "MASK") == 0) + { + out_material->alpha_mode = cgltf_alpha_mode_mask; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "BLEND") == 0) + { + out_material->alpha_mode = cgltf_alpha_mode_blend; + } + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "alphaCutoff") == 0) + { + ++i; + out_material->alpha_cutoff = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "doubleSided") == 0) + { + ++i; + out_material->double_sided = + cgltf_json_to_bool(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_material->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_material->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + ++i; + out_material->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + out_material->extensions_count= 0; + + if (!out_material->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_pbrSpecularGlossiness") == 0) + { + out_material->has_pbr_specular_glossiness = 1; + i = cgltf_parse_json_pbr_specular_glossiness(options, tokens, i + 1, json_chunk, &out_material->pbr_specular_glossiness); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_unlit") == 0) + { + out_material->unlit = 1; + i = cgltf_skip_json(tokens, i+1); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_clearcoat") == 0) + { + out_material->has_clearcoat = 1; + i = cgltf_parse_json_clearcoat(options, tokens, i + 1, json_chunk, &out_material->clearcoat); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_ior") == 0) + { + out_material->has_ior = 1; + i = cgltf_parse_json_ior(tokens, i + 1, json_chunk, &out_material->ior); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_specular") == 0) + { + out_material->has_specular = 1; + i = cgltf_parse_json_specular(options, tokens, i + 1, json_chunk, &out_material->specular); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_transmission") == 0) + { + out_material->has_transmission = 1; + i = cgltf_parse_json_transmission(options, tokens, i + 1, json_chunk, &out_material->transmission); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_volume") == 0) + { + out_material->has_volume = 1; + i = cgltf_parse_json_volume(options, tokens, i + 1, json_chunk, &out_material->volume); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_sheen") == 0) + { + out_material->has_sheen = 1; + i = cgltf_parse_json_sheen(options, tokens, i + 1, json_chunk, &out_material->sheen); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_emissive_strength") == 0) + { + out_material->has_emissive_strength = 1; + i = cgltf_parse_json_emissive_strength(tokens, i + 1, json_chunk, &out_material->emissive_strength); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_iridescence") == 0) + { + out_material->has_iridescence = 1; + i = cgltf_parse_json_iridescence(options, tokens, i + 1, json_chunk, &out_material->iridescence); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "KHR_materials_anisotropy") == 0) + { + out_material->has_anisotropy = 1; + i = cgltf_parse_json_anisotropy(options, tokens, i + 1, json_chunk, &out_material->anisotropy); + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_material->extensions[out_material->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_accessors(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_accessor), (void**)&out_data->accessors, &out_data->accessors_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->accessors_count; ++j) + { + i = cgltf_parse_json_accessor(options, tokens, i, json_chunk, &out_data->accessors[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_materials(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_material), (void**)&out_data->materials, &out_data->materials_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->materials_count; ++j) + { + i = cgltf_parse_json_material(options, tokens, i, json_chunk, &out_data->materials[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_images(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_image), (void**)&out_data->images, &out_data->images_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->images_count; ++j) + { + i = cgltf_parse_json_image(options, tokens, i, json_chunk, &out_data->images[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_textures(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_texture), (void**)&out_data->textures, &out_data->textures_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->textures_count; ++j) + { + i = cgltf_parse_json_texture(options, tokens, i, json_chunk, &out_data->textures[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_samplers(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_sampler), (void**)&out_data->samplers, &out_data->samplers_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->samplers_count; ++j) + { + i = cgltf_parse_json_sampler(options, tokens, i, json_chunk, &out_data->samplers[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_meshopt_compression(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_meshopt_compression* out_meshopt_compression) +{ + (void)options; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "buffer") == 0) + { + ++i; + out_meshopt_compression->buffer = CGLTF_PTRINDEX(cgltf_buffer, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0) + { + ++i; + out_meshopt_compression->offset = cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0) + { + ++i; + out_meshopt_compression->size = cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0) + { + ++i; + out_meshopt_compression->stride = cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "count") == 0) + { + ++i; + out_meshopt_compression->count = cgltf_json_to_int(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "mode") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens+i, json_chunk, "ATTRIBUTES") == 0) + { + out_meshopt_compression->mode = cgltf_meshopt_compression_mode_attributes; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "TRIANGLES") == 0) + { + out_meshopt_compression->mode = cgltf_meshopt_compression_mode_triangles; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "INDICES") == 0) + { + out_meshopt_compression->mode = cgltf_meshopt_compression_mode_indices; + } + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "filter") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens+i, json_chunk, "NONE") == 0) + { + out_meshopt_compression->filter = cgltf_meshopt_compression_filter_none; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "OCTAHEDRAL") == 0) + { + out_meshopt_compression->filter = cgltf_meshopt_compression_filter_octahedral; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "QUATERNION") == 0) + { + out_meshopt_compression->filter = cgltf_meshopt_compression_filter_quaternion; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "EXPONENTIAL") == 0) + { + out_meshopt_compression->filter = cgltf_meshopt_compression_filter_exponential; + } + ++i; + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_buffer_view(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_buffer_view* out_buffer_view) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer_view->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "buffer") == 0) + { + ++i; + out_buffer_view->buffer = CGLTF_PTRINDEX(cgltf_buffer, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteOffset") == 0) + { + ++i; + out_buffer_view->offset = + cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0) + { + ++i; + out_buffer_view->size = + cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteStride") == 0) + { + ++i; + out_buffer_view->stride = + cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0) + { + ++i; + int type = cgltf_json_to_int(tokens+i, json_chunk); + switch (type) + { + case 34962: + type = cgltf_buffer_view_type_vertices; + break; + case 34963: + type = cgltf_buffer_view_type_indices; + break; + default: + type = cgltf_buffer_view_type_invalid; + break; + } + out_buffer_view->type = (cgltf_buffer_view_type)type; + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_buffer_view->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_buffer_view->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + out_buffer_view->extensions_count = 0; + out_buffer_view->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!out_buffer_view->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "EXT_meshopt_compression") == 0) + { + out_buffer_view->has_meshopt_compression = 1; + i = cgltf_parse_json_meshopt_compression(options, tokens, i + 1, json_chunk, &out_buffer_view->meshopt_compression); + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_buffer_view->extensions[out_buffer_view->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_buffer_views(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_buffer_view), (void**)&out_data->buffer_views, &out_data->buffer_views_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->buffer_views_count; ++j) + { + i = cgltf_parse_json_buffer_view(options, tokens, i, json_chunk, &out_data->buffer_views[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_buffer(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_buffer* out_buffer) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "byteLength") == 0) + { + ++i; + out_buffer->size = + cgltf_json_to_size(tokens+i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "uri") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_buffer->uri); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_buffer->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_buffer->extensions_count, &out_buffer->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_buffers(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_buffer), (void**)&out_data->buffers, &out_data->buffers_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->buffers_count; ++j) + { + i = cgltf_parse_json_buffer(options, tokens, i, json_chunk, &out_data->buffers[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_skin(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_skin* out_skin) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_skin->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "joints") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_skin->joints, &out_skin->joints_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_skin->joints_count; ++k) + { + out_skin->joints[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "skeleton") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_skin->skeleton = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "inverseBindMatrices") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_skin->inverse_bind_matrices = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_skin->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_skin->extensions_count, &out_skin->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_skins(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_skin), (void**)&out_data->skins, &out_data->skins_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->skins_count; ++j) + { + i = cgltf_parse_json_skin(options, tokens, i, json_chunk, &out_data->skins[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_camera(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_camera* out_camera) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_camera->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "perspective") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + if (out_camera->type != cgltf_camera_type_invalid) + { + return CGLTF_ERROR_JSON; + } + + out_camera->type = cgltf_camera_type_perspective; + + for (int k = 0; k < data_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "aspectRatio") == 0) + { + ++i; + out_camera->data.perspective.has_aspect_ratio = 1; + out_camera->data.perspective.aspect_ratio = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "yfov") == 0) + { + ++i; + out_camera->data.perspective.yfov = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "zfar") == 0) + { + ++i; + out_camera->data.perspective.has_zfar = 1; + out_camera->data.perspective.zfar = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "znear") == 0) + { + ++i; + out_camera->data.perspective.znear = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->data.perspective.extras); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "orthographic") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + if (out_camera->type != cgltf_camera_type_invalid) + { + return CGLTF_ERROR_JSON; + } + + out_camera->type = cgltf_camera_type_orthographic; + + for (int k = 0; k < data_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "xmag") == 0) + { + ++i; + out_camera->data.orthographic.xmag = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "ymag") == 0) + { + ++i; + out_camera->data.orthographic.ymag = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "zfar") == 0) + { + ++i; + out_camera->data.orthographic.zfar = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "znear") == 0) + { + ++i; + out_camera->data.orthographic.znear = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->data.orthographic.extras); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_camera->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_camera->extensions_count, &out_camera->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_cameras(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_camera), (void**)&out_data->cameras, &out_data->cameras_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->cameras_count; ++j) + { + i = cgltf_parse_json_camera(options, tokens, i, json_chunk, &out_data->cameras[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_light(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_light* out_light) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + out_light->color[0] = 1.f; + out_light->color[1] = 1.f; + out_light->color[2] = 1.f; + out_light->intensity = 1.f; + + out_light->spot_inner_cone_angle = 0.f; + out_light->spot_outer_cone_angle = 3.1415926535f / 4.0f; + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_light->name); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "color") == 0) + { + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_light->color, 3); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "intensity") == 0) + { + ++i; + out_light->intensity = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "type") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens + i, json_chunk, "directional") == 0) + { + out_light->type = cgltf_light_type_directional; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "point") == 0) + { + out_light->type = cgltf_light_type_point; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "spot") == 0) + { + out_light->type = cgltf_light_type_spot; + } + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "range") == 0) + { + ++i; + out_light->range = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "spot") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + for (int k = 0; k < data_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "innerConeAngle") == 0) + { + ++i; + out_light->spot_inner_cone_angle = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "outerConeAngle") == 0) + { + ++i; + out_light->spot_outer_cone_angle = cgltf_json_to_float(tokens + i, json_chunk); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_light->extras); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_lights(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_light), (void**)&out_data->lights, &out_data->lights_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->lights_count; ++j) + { + i = cgltf_parse_json_light(options, tokens, i, json_chunk, &out_data->lights[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_node(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_node* out_node) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + out_node->rotation[3] = 1.0f; + out_node->scale[0] = 1.0f; + out_node->scale[1] = 1.0f; + out_node->scale[2] = 1.0f; + out_node->matrix[0] = 1.0f; + out_node->matrix[5] = 1.0f; + out_node->matrix[10] = 1.0f; + out_node->matrix[15] = 1.0f; + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_node->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "children") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_node->children, &out_node->children_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_node->children_count; ++k) + { + out_node->children[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "mesh") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_node->mesh = CGLTF_PTRINDEX(cgltf_mesh, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "skin") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_node->skin = CGLTF_PTRINDEX(cgltf_skin, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "camera") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_node->camera = CGLTF_PTRINDEX(cgltf_camera, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "translation") == 0) + { + out_node->has_translation = 1; + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->translation, 3); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "rotation") == 0) + { + out_node->has_rotation = 1; + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->rotation, 4); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "scale") == 0) + { + out_node->has_scale = 1; + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->scale, 3); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "matrix") == 0) + { + out_node->has_matrix = 1; + i = cgltf_parse_json_float_array(tokens, i + 1, json_chunk, out_node->matrix, 16); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "weights") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_float), (void**)&out_node->weights, &out_node->weights_count); + if (i < 0) + { + return i; + } + + i = cgltf_parse_json_float_array(tokens, i - 1, json_chunk, out_node->weights, (int)out_node->weights_count); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_node->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_node->extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + out_node->extensions_count= 0; + out_node->extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!out_node->extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_lights_punctual") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + for (int m = 0; m < data_size; ++m) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "light") == 0) + { + ++i; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_PRIMITIVE); + out_node->light = CGLTF_PTRINDEX(cgltf_light, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "EXT_mesh_gpu_instancing") == 0) + { + out_node->has_mesh_gpu_instancing = 1; + i = cgltf_parse_json_mesh_gpu_instancing(options, tokens, i + 1, json_chunk, &out_node->mesh_gpu_instancing); + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_node->extensions[out_node->extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_nodes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_node), (void**)&out_data->nodes, &out_data->nodes_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->nodes_count; ++j) + { + i = cgltf_parse_json_node(options, tokens, i, json_chunk, &out_data->nodes[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_scene(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_scene* out_scene) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_scene->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "nodes") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_node*), (void**)&out_scene->nodes, &out_scene->nodes_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_scene->nodes_count; ++k) + { + out_scene->nodes[k] = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_scene->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_scene->extensions_count, &out_scene->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_scenes(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_scene), (void**)&out_data->scenes, &out_data->scenes_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->scenes_count; ++j) + { + i = cgltf_parse_json_scene(options, tokens, i, json_chunk, &out_data->scenes[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_animation_sampler(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation_sampler* out_sampler) +{ + (void)options; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "input") == 0) + { + ++i; + out_sampler->input = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "output") == 0) + { + ++i; + out_sampler->output = CGLTF_PTRINDEX(cgltf_accessor, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "interpolation") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens + i, json_chunk, "LINEAR") == 0) + { + out_sampler->interpolation = cgltf_interpolation_type_linear; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "STEP") == 0) + { + out_sampler->interpolation = cgltf_interpolation_type_step; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "CUBICSPLINE") == 0) + { + out_sampler->interpolation = cgltf_interpolation_type_cubic_spline; + } + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_sampler->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_sampler->extensions_count, &out_sampler->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_animation_channel(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation_channel* out_channel) +{ + (void)options; + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "sampler") == 0) + { + ++i; + out_channel->sampler = CGLTF_PTRINDEX(cgltf_animation_sampler, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "target") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int target_size = tokens[i].size; + ++i; + + for (int k = 0; k < target_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "node") == 0) + { + ++i; + out_channel->target_node = CGLTF_PTRINDEX(cgltf_node, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "path") == 0) + { + ++i; + if (cgltf_json_strcmp(tokens+i, json_chunk, "translation") == 0) + { + out_channel->target_path = cgltf_animation_path_type_translation; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "rotation") == 0) + { + out_channel->target_path = cgltf_animation_path_type_rotation; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "scale") == 0) + { + out_channel->target_path = cgltf_animation_path_type_scale; + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "weights") == 0) + { + out_channel->target_path = cgltf_animation_path_type_weights; + } + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_channel->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_channel->extensions_count, &out_channel->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_animation(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_animation* out_animation) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_animation->name); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "samplers") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_animation_sampler), (void**)&out_animation->samplers, &out_animation->samplers_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_animation->samplers_count; ++k) + { + i = cgltf_parse_json_animation_sampler(options, tokens, i, json_chunk, &out_animation->samplers[k]); + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "channels") == 0) + { + i = cgltf_parse_json_array(options, tokens, i + 1, json_chunk, sizeof(cgltf_animation_channel), (void**)&out_animation->channels, &out_animation->channels_count); + if (i < 0) + { + return i; + } + + for (cgltf_size k = 0; k < out_animation->channels_count; ++k) + { + i = cgltf_parse_json_animation_channel(options, tokens, i, json_chunk, &out_animation->channels[k]); + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_animation->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_animation->extensions_count, &out_animation->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_animations(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_animation), (void**)&out_data->animations, &out_data->animations_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->animations_count; ++j) + { + i = cgltf_parse_json_animation(options, tokens, i, json_chunk, &out_data->animations[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_variant(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_material_variant* out_variant) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "name") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_variant->name); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_variant->extras); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +static int cgltf_parse_json_variants(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + i = cgltf_parse_json_array(options, tokens, i, json_chunk, sizeof(cgltf_material_variant), (void**)&out_data->variants, &out_data->variants_count); + if (i < 0) + { + return i; + } + + for (cgltf_size j = 0; j < out_data->variants_count; ++j) + { + i = cgltf_parse_json_variant(options, tokens, i, json_chunk, &out_data->variants[j]); + if (i < 0) + { + return i; + } + } + return i; +} + +static int cgltf_parse_json_asset(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_asset* out_asset) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "copyright") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->copyright); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "generator") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->generator); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "version") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->version); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "minVersion") == 0) + { + i = cgltf_parse_json_string(options, tokens, i + 1, json_chunk, &out_asset->min_version); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_asset->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + i = cgltf_parse_json_unprocessed_extensions(options, tokens, i, json_chunk, &out_asset->extensions_count, &out_asset->extensions); + } + else + { + i = cgltf_skip_json(tokens, i+1); + } + + if (i < 0) + { + return i; + } + } + + if (out_asset->version && CGLTF_ATOF(out_asset->version) < 2) + { + return CGLTF_ERROR_LEGACY; + } + + return i; +} + +cgltf_size cgltf_num_components(cgltf_type type) { + switch (type) + { + case cgltf_type_vec2: + return 2; + case cgltf_type_vec3: + return 3; + case cgltf_type_vec4: + return 4; + case cgltf_type_mat2: + return 4; + case cgltf_type_mat3: + return 9; + case cgltf_type_mat4: + return 16; + case cgltf_type_invalid: + case cgltf_type_scalar: + default: + return 1; + } +} + +cgltf_size cgltf_component_size(cgltf_component_type component_type) { + switch (component_type) + { + case cgltf_component_type_r_8: + case cgltf_component_type_r_8u: + return 1; + case cgltf_component_type_r_16: + case cgltf_component_type_r_16u: + return 2; + case cgltf_component_type_r_32u: + case cgltf_component_type_r_32f: + return 4; + case cgltf_component_type_invalid: + default: + return 0; + } +} + +cgltf_size cgltf_calc_size(cgltf_type type, cgltf_component_type component_type) +{ + cgltf_size component_size = cgltf_component_size(component_type); + if (type == cgltf_type_mat2 && component_size == 1) + { + return 8 * component_size; + } + else if (type == cgltf_type_mat3 && (component_size == 1 || component_size == 2)) + { + return 12 * component_size; + } + return component_size * cgltf_num_components(type); +} + +static int cgltf_fixup_pointers(cgltf_data* out_data); + +static int cgltf_parse_json_root(cgltf_options* options, jsmntok_t const* tokens, int i, const uint8_t* json_chunk, cgltf_data* out_data) +{ + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int size = tokens[i].size; + ++i; + + for (int j = 0; j < size; ++j) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "asset") == 0) + { + i = cgltf_parse_json_asset(options, tokens, i + 1, json_chunk, &out_data->asset); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "meshes") == 0) + { + i = cgltf_parse_json_meshes(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "accessors") == 0) + { + i = cgltf_parse_json_accessors(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "bufferViews") == 0) + { + i = cgltf_parse_json_buffer_views(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "buffers") == 0) + { + i = cgltf_parse_json_buffers(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "materials") == 0) + { + i = cgltf_parse_json_materials(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "images") == 0) + { + i = cgltf_parse_json_images(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "textures") == 0) + { + i = cgltf_parse_json_textures(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "samplers") == 0) + { + i = cgltf_parse_json_samplers(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "skins") == 0) + { + i = cgltf_parse_json_skins(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "cameras") == 0) + { + i = cgltf_parse_json_cameras(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "nodes") == 0) + { + i = cgltf_parse_json_nodes(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "scenes") == 0) + { + i = cgltf_parse_json_scenes(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "scene") == 0) + { + ++i; + out_data->scene = CGLTF_PTRINDEX(cgltf_scene, cgltf_json_to_int(tokens + i, json_chunk)); + ++i; + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "animations") == 0) + { + i = cgltf_parse_json_animations(options, tokens, i + 1, json_chunk, out_data); + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "extras") == 0) + { + i = cgltf_parse_json_extras(options, tokens, i + 1, json_chunk, &out_data->extras); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensions") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + if(out_data->data_extensions) + { + return CGLTF_ERROR_JSON; + } + + int extensions_size = tokens[i].size; + out_data->data_extensions_count = 0; + out_data->data_extensions = (cgltf_extension*)cgltf_calloc(options, sizeof(cgltf_extension), extensions_size); + + if (!out_data->data_extensions) + { + return CGLTF_ERROR_NOMEM; + } + + ++i; + + for (int k = 0; k < extensions_size; ++k) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_lights_punctual") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + for (int m = 0; m < data_size; ++m) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "lights") == 0) + { + i = cgltf_parse_json_lights(options, tokens, i + 1, json_chunk, out_data); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens+i, json_chunk, "KHR_materials_variants") == 0) + { + ++i; + + CGLTF_CHECK_TOKTYPE(tokens[i], JSMN_OBJECT); + + int data_size = tokens[i].size; + ++i; + + for (int m = 0; m < data_size; ++m) + { + CGLTF_CHECK_KEY(tokens[i]); + + if (cgltf_json_strcmp(tokens + i, json_chunk, "variants") == 0) + { + i = cgltf_parse_json_variants(options, tokens, i + 1, json_chunk, out_data); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + } + else + { + i = cgltf_parse_json_unprocessed_extension(options, tokens, i, json_chunk, &(out_data->data_extensions[out_data->data_extensions_count++])); + } + + if (i < 0) + { + return i; + } + } + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensionsUsed") == 0) + { + i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_data->extensions_used, &out_data->extensions_used_count); + } + else if (cgltf_json_strcmp(tokens + i, json_chunk, "extensionsRequired") == 0) + { + i = cgltf_parse_json_string_array(options, tokens, i + 1, json_chunk, &out_data->extensions_required, &out_data->extensions_required_count); + } + else + { + i = cgltf_skip_json(tokens, i + 1); + } + + if (i < 0) + { + return i; + } + } + + return i; +} + +cgltf_result cgltf_parse_json(cgltf_options* options, const uint8_t* json_chunk, cgltf_size size, cgltf_data** out_data) +{ + jsmn_parser parser = { 0, 0, 0 }; + + if (options->json_token_count == 0) + { + int token_count = jsmn_parse(&parser, (const char*)json_chunk, size, NULL, 0); + + if (token_count <= 0) + { + return cgltf_result_invalid_json; + } + + options->json_token_count = token_count; + } + + jsmntok_t* tokens = (jsmntok_t*)options->memory.alloc_func(options->memory.user_data, sizeof(jsmntok_t) * (options->json_token_count + 1)); + + if (!tokens) + { + return cgltf_result_out_of_memory; + } + + jsmn_init(&parser); + + int token_count = jsmn_parse(&parser, (const char*)json_chunk, size, tokens, options->json_token_count); + + if (token_count <= 0) + { + options->memory.free_func(options->memory.user_data, tokens); + return cgltf_result_invalid_json; + } + + // this makes sure that we always have an UNDEFINED token at the end of the stream + // for invalid JSON inputs this makes sure we don't perform out of bound reads of token data + tokens[token_count].type = JSMN_UNDEFINED; + + cgltf_data* data = (cgltf_data*)options->memory.alloc_func(options->memory.user_data, sizeof(cgltf_data)); + + if (!data) + { + options->memory.free_func(options->memory.user_data, tokens); + return cgltf_result_out_of_memory; + } + + memset(data, 0, sizeof(cgltf_data)); + data->memory = options->memory; + data->file = options->file; + + int i = cgltf_parse_json_root(options, tokens, 0, json_chunk, data); + + options->memory.free_func(options->memory.user_data, tokens); + + if (i < 0) + { + cgltf_free(data); + + switch (i) + { + case CGLTF_ERROR_NOMEM: return cgltf_result_out_of_memory; + case CGLTF_ERROR_LEGACY: return cgltf_result_legacy_gltf; + default: return cgltf_result_invalid_gltf; + } + } + + if (cgltf_fixup_pointers(data) < 0) + { + cgltf_free(data); + return cgltf_result_invalid_gltf; + } + + data->json = (const char*)json_chunk; + data->json_size = size; + + *out_data = data; + + return cgltf_result_success; +} + +static int cgltf_fixup_pointers(cgltf_data* data) +{ + for (cgltf_size i = 0; i < data->meshes_count; ++i) + { + for (cgltf_size j = 0; j < data->meshes[i].primitives_count; ++j) + { + CGLTF_PTRFIXUP(data->meshes[i].primitives[j].indices, data->accessors, data->accessors_count); + CGLTF_PTRFIXUP(data->meshes[i].primitives[j].material, data->materials, data->materials_count); + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].attributes_count; ++k) + { + CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].attributes[k].data, data->accessors, data->accessors_count); + } + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].targets_count; ++k) + { + for (cgltf_size m = 0; m < data->meshes[i].primitives[j].targets[k].attributes_count; ++m) + { + CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].targets[k].attributes[m].data, data->accessors, data->accessors_count); + } + } + + if (data->meshes[i].primitives[j].has_draco_mesh_compression) + { + CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].draco_mesh_compression.buffer_view, data->buffer_views, data->buffer_views_count); + for (cgltf_size m = 0; m < data->meshes[i].primitives[j].draco_mesh_compression.attributes_count; ++m) + { + CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].draco_mesh_compression.attributes[m].data, data->accessors, data->accessors_count); + } + } + + for (cgltf_size k = 0; k < data->meshes[i].primitives[j].mappings_count; ++k) + { + CGLTF_PTRFIXUP_REQ(data->meshes[i].primitives[j].mappings[k].material, data->materials, data->materials_count); + } + } + } + + for (cgltf_size i = 0; i < data->accessors_count; ++i) + { + CGLTF_PTRFIXUP(data->accessors[i].buffer_view, data->buffer_views, data->buffer_views_count); + + if (data->accessors[i].is_sparse) + { + CGLTF_PTRFIXUP_REQ(data->accessors[i].sparse.indices_buffer_view, data->buffer_views, data->buffer_views_count); + CGLTF_PTRFIXUP_REQ(data->accessors[i].sparse.values_buffer_view, data->buffer_views, data->buffer_views_count); + } + + if (data->accessors[i].buffer_view) + { + data->accessors[i].stride = data->accessors[i].buffer_view->stride; + } + + if (data->accessors[i].stride == 0) + { + data->accessors[i].stride = cgltf_calc_size(data->accessors[i].type, data->accessors[i].component_type); + } + } + + for (cgltf_size i = 0; i < data->textures_count; ++i) + { + CGLTF_PTRFIXUP(data->textures[i].image, data->images, data->images_count); + CGLTF_PTRFIXUP(data->textures[i].basisu_image, data->images, data->images_count); + CGLTF_PTRFIXUP(data->textures[i].sampler, data->samplers, data->samplers_count); + } + + for (cgltf_size i = 0; i < data->images_count; ++i) + { + CGLTF_PTRFIXUP(data->images[i].buffer_view, data->buffer_views, data->buffer_views_count); + } + + for (cgltf_size i = 0; i < data->materials_count; ++i) + { + CGLTF_PTRFIXUP(data->materials[i].normal_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].emissive_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].occlusion_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].pbr_metallic_roughness.base_color_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].pbr_metallic_roughness.metallic_roughness_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].pbr_specular_glossiness.diffuse_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].pbr_specular_glossiness.specular_glossiness_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_roughness_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].clearcoat.clearcoat_normal_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].specular.specular_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].specular.specular_color_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].transmission.transmission_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].volume.thickness_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].sheen.sheen_color_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].sheen.sheen_roughness_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].iridescence.iridescence_texture.texture, data->textures, data->textures_count); + CGLTF_PTRFIXUP(data->materials[i].iridescence.iridescence_thickness_texture.texture, data->textures, data->textures_count); + + CGLTF_PTRFIXUP(data->materials[i].anisotropy.anisotropy_texture.texture, data->textures, data->textures_count); + } + + for (cgltf_size i = 0; i < data->buffer_views_count; ++i) + { + CGLTF_PTRFIXUP_REQ(data->buffer_views[i].buffer, data->buffers, data->buffers_count); + + if (data->buffer_views[i].has_meshopt_compression) + { + CGLTF_PTRFIXUP_REQ(data->buffer_views[i].meshopt_compression.buffer, data->buffers, data->buffers_count); + } + } + + for (cgltf_size i = 0; i < data->skins_count; ++i) + { + for (cgltf_size j = 0; j < data->skins[i].joints_count; ++j) + { + CGLTF_PTRFIXUP_REQ(data->skins[i].joints[j], data->nodes, data->nodes_count); + } + + CGLTF_PTRFIXUP(data->skins[i].skeleton, data->nodes, data->nodes_count); + CGLTF_PTRFIXUP(data->skins[i].inverse_bind_matrices, data->accessors, data->accessors_count); + } + + for (cgltf_size i = 0; i < data->nodes_count; ++i) + { + for (cgltf_size j = 0; j < data->nodes[i].children_count; ++j) + { + CGLTF_PTRFIXUP_REQ(data->nodes[i].children[j], data->nodes, data->nodes_count); + + if (data->nodes[i].children[j]->parent) + { + return CGLTF_ERROR_JSON; + } + + data->nodes[i].children[j]->parent = &data->nodes[i]; + } + + CGLTF_PTRFIXUP(data->nodes[i].mesh, data->meshes, data->meshes_count); + CGLTF_PTRFIXUP(data->nodes[i].skin, data->skins, data->skins_count); + CGLTF_PTRFIXUP(data->nodes[i].camera, data->cameras, data->cameras_count); + CGLTF_PTRFIXUP(data->nodes[i].light, data->lights, data->lights_count); + + if (data->nodes[i].has_mesh_gpu_instancing) + { + for (cgltf_size m = 0; m < data->nodes[i].mesh_gpu_instancing.attributes_count; ++m) + { + CGLTF_PTRFIXUP_REQ(data->nodes[i].mesh_gpu_instancing.attributes[m].data, data->accessors, data->accessors_count); + } + } + } + + for (cgltf_size i = 0; i < data->scenes_count; ++i) + { + for (cgltf_size j = 0; j < data->scenes[i].nodes_count; ++j) + { + CGLTF_PTRFIXUP_REQ(data->scenes[i].nodes[j], data->nodes, data->nodes_count); + + if (data->scenes[i].nodes[j]->parent) + { + return CGLTF_ERROR_JSON; + } + } + } + + CGLTF_PTRFIXUP(data->scene, data->scenes, data->scenes_count); + + for (cgltf_size i = 0; i < data->animations_count; ++i) + { + for (cgltf_size j = 0; j < data->animations[i].samplers_count; ++j) + { + CGLTF_PTRFIXUP_REQ(data->animations[i].samplers[j].input, data->accessors, data->accessors_count); + CGLTF_PTRFIXUP_REQ(data->animations[i].samplers[j].output, data->accessors, data->accessors_count); + } + + for (cgltf_size j = 0; j < data->animations[i].channels_count; ++j) + { + CGLTF_PTRFIXUP_REQ(data->animations[i].channels[j].sampler, data->animations[i].samplers, data->animations[i].samplers_count); + CGLTF_PTRFIXUP(data->animations[i].channels[j].target_node, data->nodes, data->nodes_count); + } + } + + return 0; +} + +/* + * -- jsmn.c start -- + * Source: https://github.com/zserge/jsmn + * License: MIT + * + * Copyright (c) 2010 Serge A. Zaitsev + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/** + * Allocates a fresh unused token from the token pull. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *tok; + if (parser->toknext >= num_tokens) { + return NULL; + } + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +/** + * Fills token type and boundaries. + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + ptrdiff_t start, ptrdiff_t end) { + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/** + * Fills next available token with JSON primitive. + */ +static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + ptrdiff_t start; + + start = parser->pos; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* In strict mode primitive must be followed by "," or "}" or "]" */ + case ':': +#endif + case '\t' : case '\r' : case '\n' : case ' ' : + case ',' : case ']' : case '}' : + goto found; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* In strict mode primitive must be followed by a comma/object/array */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + +found: + if (tokens == NULL) { + parser->pos--; + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; +} + +/** + * Fills next token with JSON string. + */ +static int jsmn_parse_string(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + + ptrdiff_t start = parser->pos; + + parser->pos++; + + /* Skip starting quote */ + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + if (tokens == NULL) { + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\' && parser->pos + 1 < len) { + int i; + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': case '/' : case '\\' : case 'b' : + case 'f' : case 'r' : case 'n' : case 't' : + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + parser->pos++; + for(i = 0; i < 4 && parser->pos < len && js[parser->pos] != '\0'; i++) { + /* If it isn't a hex character we have an error */ + if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ + (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ + (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ + parser->pos = start; + return JSMN_ERROR_INVAL; + } + parser->pos++; + } + parser->pos--; + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/** + * Parse JSON string and fill tokens. + */ +static int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, size_t num_tokens) { + int r; + int i; + jsmntok_t *token; + int count = parser->toknext; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': case '[': + count++; + if (tokens == NULL) { + break; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) + return JSMN_ERROR_NOMEM; + if (parser->toksuper != -1) { + tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + } + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': case ']': + if (tokens == NULL) + break; + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); +#ifdef JSMN_PARENT_LINKS + if (parser->toknext < 1) { + return JSMN_ERROR_INVAL; + } + token = &tokens[parser->toknext - 1]; + for (;;) { + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = parser->pos + 1; + parser->toksuper = token->parent; + break; + } + if (token->parent == -1) { + if(token->type != type || parser->toksuper == -1) { + return JSMN_ERROR_INVAL; + } + break; + } + token = &tokens[token->parent]; + } +#else + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) return JSMN_ERROR_INVAL; + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } +#endif + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + case '\t' : case '\r' : case '\n' : case ' ': + break; + case ':': + parser->toksuper = parser->toknext - 1; + break; + case ',': + if (tokens != NULL && parser->toksuper != -1 && + tokens[parser->toksuper].type != JSMN_ARRAY && + tokens[parser->toksuper].type != JSMN_OBJECT) { +#ifdef JSMN_PARENT_LINKS + parser->toksuper = tokens[parser->toksuper].parent; +#else + for (i = parser->toknext - 1; i >= 0; i--) { + if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) { + if (tokens[i].start != -1 && tokens[i].end == -1) { + parser->toksuper = i; + break; + } + } + } +#endif + } + break; +#ifdef JSMN_STRICT + /* In strict mode primitives are: numbers and booleans */ + case '-': case '0': case '1' : case '2': case '3' : case '4': + case '5': case '6': case '7' : case '8': case '9': + case 't': case 'f': case 'n' : + /* And they must not be keys of the object */ + if (tokens != NULL && parser->toksuper != -1) { + jsmntok_t *t = &tokens[parser->toksuper]; + if (t->type == JSMN_OBJECT || + (t->type == JSMN_STRING && t->size != 0)) { + return JSMN_ERROR_INVAL; + } + } +#else + /* In non-strict mode every unquoted value is a primitive */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + if (tokens != NULL) { + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + } + + return count; +} + +/** + * Creates a new parser based over a given buffer with an array of tokens + * available. + */ +static void jsmn_init(jsmn_parser *parser) { + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} +/* + * -- jsmn.c end -- + */ + +#endif /* #ifdef CGLTF_IMPLEMENTATION */ + +/* cgltf is distributed under MIT license: + * + * Copyright (c) 2018-2021 Johannes Kuhlmann + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ diff --git a/libkram/cgltf/cgltf_write.h b/libkram/cgltf/cgltf_write.h new file mode 100644 index 00000000..be22b888 --- /dev/null +++ b/libkram/cgltf/cgltf_write.h @@ -0,0 +1,1506 @@ +/** + * cgltf_write - a single-file glTF 2.0 writer written in C99. + * + * Version: 1.13 + * + * Website: https://github.com/jkuhlmann/cgltf + * + * Distributed under the MIT License, see notice at the end of this file. + * + * Building: + * Include this file where you need the struct and function + * declarations. Have exactly one source file where you define + * `CGLTF_WRITE_IMPLEMENTATION` before including this file to get the + * function definitions. + * + * Reference: + * `cgltf_result cgltf_write_file(const cgltf_options* options, const char* + * path, const cgltf_data* data)` writes a glTF data to the given file path. + * If `options->type` is `cgltf_file_type_glb`, both JSON content and binary + * buffer of the given glTF data will be written in a GLB format. + * Otherwise, only the JSON part will be written. + * External buffers and images are not written out. `data` is not deallocated. + * + * `cgltf_size cgltf_write(const cgltf_options* options, char* buffer, + * cgltf_size size, const cgltf_data* data)` writes JSON into the given memory + * buffer. Returns the number of bytes written to `buffer`, including a null + * terminator. If buffer is null, returns the number of bytes that would have + * been written. `data` is not deallocated. + */ +#ifndef CGLTF_WRITE_H_INCLUDED__ +#define CGLTF_WRITE_H_INCLUDED__ + +#include "cgltf.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +cgltf_result cgltf_write_file(const cgltf_options* options, const char* path, const cgltf_data* data); +cgltf_size cgltf_write(const cgltf_options* options, char* buffer, cgltf_size size, const cgltf_data* data); + +#ifdef __cplusplus +} +#endif + +#endif /* #ifndef CGLTF_WRITE_H_INCLUDED__ */ + +/* + * + * Stop now, if you are only interested in the API. + * Below, you find the implementation. + * + */ + +#if defined(__INTELLISENSE__) || defined(__JETBRAINS_IDE__) +/* This makes MSVC/CLion intellisense work. */ +#define CGLTF_WRITE_IMPLEMENTATION +#endif + +#ifdef CGLTF_WRITE_IMPLEMENTATION + +#include +#include +#include +#include +#include +#include + +#define CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM (1 << 0) +#define CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT (1 << 1) +#define CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS (1 << 2) +#define CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL (1 << 3) +#define CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION (1 << 4) +#define CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT (1 << 5) +#define CGLTF_EXTENSION_FLAG_MATERIALS_IOR (1 << 6) +#define CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR (1 << 7) +#define CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION (1 << 8) +#define CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN (1 << 9) +#define CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS (1 << 10) +#define CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME (1 << 11) +#define CGLTF_EXTENSION_FLAG_TEXTURE_BASISU (1 << 12) +#define CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH (1 << 13) +#define CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING (1 << 14) +#define CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE (1 << 15) +#define CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY (1 << 16) + +typedef struct { + char* buffer; + cgltf_size buffer_size; + cgltf_size remaining; + char* cursor; + cgltf_size tmp; + cgltf_size chars_written; + const cgltf_data* data; + int depth; + const char* indent; + int needs_comma; + uint32_t extension_flags; + uint32_t required_extension_flags; +} cgltf_write_context; + +#define CGLTF_MIN(a, b) (a < b ? a : b) + +#ifdef FLT_DECIMAL_DIG + // FLT_DECIMAL_DIG is C11 + #define CGLTF_DECIMAL_DIG (FLT_DECIMAL_DIG) +#else + #define CGLTF_DECIMAL_DIG 9 +#endif + +#define CGLTF_SPRINTF(...) { \ + assert(context->cursor || (!context->cursor && context->remaining == 0)); \ + context->tmp = snprintf ( context->cursor, context->remaining, __VA_ARGS__ ); \ + context->chars_written += context->tmp; \ + if (context->cursor) { \ + context->cursor += context->tmp; \ + context->remaining -= context->tmp; \ + } } + +#define CGLTF_SNPRINTF(length, ...) { \ + assert(context->cursor || (!context->cursor && context->remaining == 0)); \ + context->tmp = snprintf ( context->cursor, CGLTF_MIN(length + 1, context->remaining), __VA_ARGS__ ); \ + context->chars_written += length; \ + if (context->cursor) { \ + context->cursor += length; \ + context->remaining -= length; \ + } } + +#define CGLTF_WRITE_IDXPROP(label, val, start) if (val) { \ + cgltf_write_indent(context); \ + CGLTF_SPRINTF("\"%s\": %d", label, (int) (val - start)); \ + context->needs_comma = 1; } + +#define CGLTF_WRITE_IDXARRPROP(label, dim, vals, start) if (vals) { \ + cgltf_write_indent(context); \ + CGLTF_SPRINTF("\"%s\": [", label); \ + for (int i = 0; i < (int)(dim); ++i) { \ + int idx = (int) (vals[i] - start); \ + if (i != 0) CGLTF_SPRINTF(","); \ + CGLTF_SPRINTF(" %d", idx); \ + } \ + CGLTF_SPRINTF(" ]"); \ + context->needs_comma = 1; } + +#define CGLTF_WRITE_TEXTURE_INFO(label, info) if (info.texture) { \ + cgltf_write_line(context, "\"" label "\": {"); \ + CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \ + cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \ + if (info.has_transform) { \ + context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \ + cgltf_write_texture_transform(context, &info.transform); \ + } \ + cgltf_write_extras(context, &info.extras); \ + cgltf_write_line(context, "}"); } + +#define CGLTF_WRITE_NORMAL_TEXTURE_INFO(label, info) if (info.texture) { \ + cgltf_write_line(context, "\"" label "\": {"); \ + CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \ + cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \ + cgltf_write_floatprop(context, "scale", info.scale, 1.0f); \ + if (info.has_transform) { \ + context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \ + cgltf_write_texture_transform(context, &info.transform); \ + } \ + cgltf_write_extras(context, &info.extras); \ + cgltf_write_line(context, "}"); } + +#define CGLTF_WRITE_OCCLUSION_TEXTURE_INFO(label, info) if (info.texture) { \ + cgltf_write_line(context, "\"" label "\": {"); \ + CGLTF_WRITE_IDXPROP("index", info.texture, context->data->textures); \ + cgltf_write_intprop(context, "texCoord", info.texcoord, 0); \ + cgltf_write_floatprop(context, "strength", info.scale, 1.0f); \ + if (info.has_transform) { \ + context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM; \ + cgltf_write_texture_transform(context, &info.transform); \ + } \ + cgltf_write_extras(context, &info.extras); \ + cgltf_write_line(context, "}"); } + +#ifndef CGLTF_CONSTS +static const cgltf_size GlbHeaderSize = 12; +static const cgltf_size GlbChunkHeaderSize = 8; +static const uint32_t GlbVersion = 2; +static const uint32_t GlbMagic = 0x46546C67; +static const uint32_t GlbMagicJsonChunk = 0x4E4F534A; +static const uint32_t GlbMagicBinChunk = 0x004E4942; +#define CGLTF_CONSTS +#endif + +static void cgltf_write_indent(cgltf_write_context* context) +{ + if (context->needs_comma) + { + CGLTF_SPRINTF(",\n"); + context->needs_comma = 0; + } + else + { + CGLTF_SPRINTF("\n"); + } + for (int i = 0; i < context->depth; ++i) + { + CGLTF_SPRINTF("%s", context->indent); + } +} + +static void cgltf_write_line(cgltf_write_context* context, const char* line) +{ + if (line[0] == ']' || line[0] == '}') + { + --context->depth; + context->needs_comma = 0; + } + cgltf_write_indent(context); + CGLTF_SPRINTF("%s", line); + cgltf_size last = (cgltf_size)(strlen(line) - 1); + if (line[0] == ']' || line[0] == '}') + { + context->needs_comma = 1; + } + if (line[last] == '[' || line[last] == '{') + { + ++context->depth; + context->needs_comma = 0; + } +} + +static void cgltf_write_strprop(cgltf_write_context* context, const char* label, const char* val) +{ + if (val) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": \"%s\"", label, val); + context->needs_comma = 1; + } +} + +static void cgltf_write_extras(cgltf_write_context* context, const cgltf_extras* extras) +{ + if (extras->data) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"extras\": %s", extras->data); + context->needs_comma = 1; + } + else + { + cgltf_size length = extras->end_offset - extras->start_offset; + if (length > 0 && context->data->json) + { + char* json_string = ((char*) context->data->json) + extras->start_offset; + cgltf_write_indent(context); + CGLTF_SPRINTF("%s", "\"extras\": "); + CGLTF_SNPRINTF(length, "%.*s", (int)(extras->end_offset - extras->start_offset), json_string); + context->needs_comma = 1; + } + } +} + +static void cgltf_write_stritem(cgltf_write_context* context, const char* item) +{ + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\"", item); + context->needs_comma = 1; +} + +static void cgltf_write_intprop(cgltf_write_context* context, const char* label, int val, int def) +{ + if (val != def) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": %d", label, val); + context->needs_comma = 1; + } +} + +static void cgltf_write_sizeprop(cgltf_write_context* context, const char* label, cgltf_size val, cgltf_size def) +{ + if (val != def) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": %zu", label, val); + context->needs_comma = 1; + } +} + +static void cgltf_write_floatprop(cgltf_write_context* context, const char* label, float val, float def) +{ + if (val != def) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": ", label); + CGLTF_SPRINTF("%.*g", CGLTF_DECIMAL_DIG, val); + context->needs_comma = 1; + + if (context->cursor) + { + char *decimal_comma = strchr(context->cursor - context->tmp, ','); + if (decimal_comma) + { + *decimal_comma = '.'; + } + } + } +} + +static void cgltf_write_boolprop_optional(cgltf_write_context* context, const char* label, bool val, bool def) +{ + if (val != def) + { + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": %s", label, val ? "true" : "false"); + context->needs_comma = 1; + } +} + +static void cgltf_write_floatarrayprop(cgltf_write_context* context, const char* label, const cgltf_float* vals, cgltf_size dim) +{ + cgltf_write_indent(context); + CGLTF_SPRINTF("\"%s\": [", label); + for (cgltf_size i = 0; i < dim; ++i) + { + if (i != 0) + { + CGLTF_SPRINTF(", %.*g", CGLTF_DECIMAL_DIG, vals[i]); + } + else + { + CGLTF_SPRINTF("%.*g", CGLTF_DECIMAL_DIG, vals[i]); + } + } + CGLTF_SPRINTF("]"); + context->needs_comma = 1; +} + +static bool cgltf_check_floatarray(const float* vals, int dim, float val) { + while (dim--) + { + if (vals[dim] != val) + { + return true; + } + } + return false; +} + +static int cgltf_int_from_component_type(cgltf_component_type ctype) +{ + switch (ctype) + { + case cgltf_component_type_r_8: return 5120; + case cgltf_component_type_r_8u: return 5121; + case cgltf_component_type_r_16: return 5122; + case cgltf_component_type_r_16u: return 5123; + case cgltf_component_type_r_32u: return 5125; + case cgltf_component_type_r_32f: return 5126; + default: return 0; + } +} + +static const char* cgltf_str_from_alpha_mode(cgltf_alpha_mode alpha_mode) +{ + switch (alpha_mode) + { + case cgltf_alpha_mode_mask: return "MASK"; + case cgltf_alpha_mode_blend: return "BLEND"; + default: return NULL; + } +} + +static const char* cgltf_str_from_type(cgltf_type type) +{ + switch (type) + { + case cgltf_type_scalar: return "SCALAR"; + case cgltf_type_vec2: return "VEC2"; + case cgltf_type_vec3: return "VEC3"; + case cgltf_type_vec4: return "VEC4"; + case cgltf_type_mat2: return "MAT2"; + case cgltf_type_mat3: return "MAT3"; + case cgltf_type_mat4: return "MAT4"; + default: return NULL; + } +} + +static cgltf_size cgltf_dim_from_type(cgltf_type type) +{ + switch (type) + { + case cgltf_type_scalar: return 1; + case cgltf_type_vec2: return 2; + case cgltf_type_vec3: return 3; + case cgltf_type_vec4: return 4; + case cgltf_type_mat2: return 4; + case cgltf_type_mat3: return 9; + case cgltf_type_mat4: return 16; + default: return 0; + } +} + +static const char* cgltf_str_from_camera_type(cgltf_camera_type camera_type) +{ + switch (camera_type) + { + case cgltf_camera_type_perspective: return "perspective"; + case cgltf_camera_type_orthographic: return "orthographic"; + default: return NULL; + } +} + +static const char* cgltf_str_from_light_type(cgltf_light_type light_type) +{ + switch (light_type) + { + case cgltf_light_type_directional: return "directional"; + case cgltf_light_type_point: return "point"; + case cgltf_light_type_spot: return "spot"; + default: return NULL; + } +} + +static void cgltf_write_texture_transform(cgltf_write_context* context, const cgltf_texture_transform* transform) +{ + cgltf_write_line(context, "\"extensions\": {"); + cgltf_write_line(context, "\"KHR_texture_transform\": {"); + if (cgltf_check_floatarray(transform->offset, 2, 0.0f)) + { + cgltf_write_floatarrayprop(context, "offset", transform->offset, 2); + } + cgltf_write_floatprop(context, "rotation", transform->rotation, 0.0f); + if (cgltf_check_floatarray(transform->scale, 2, 1.0f)) + { + cgltf_write_floatarrayprop(context, "scale", transform->scale, 2); + } + if (transform->has_texcoord) + { + cgltf_write_intprop(context, "texCoord", transform->texcoord, -1); + } + cgltf_write_line(context, "}"); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_asset(cgltf_write_context* context, const cgltf_asset* asset) +{ + cgltf_write_line(context, "\"asset\": {"); + cgltf_write_strprop(context, "copyright", asset->copyright); + cgltf_write_strprop(context, "generator", asset->generator); + cgltf_write_strprop(context, "version", asset->version); + cgltf_write_strprop(context, "min_version", asset->min_version); + cgltf_write_extras(context, &asset->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_primitive(cgltf_write_context* context, const cgltf_primitive* prim) +{ + cgltf_write_intprop(context, "mode", (int) prim->type, 4); + CGLTF_WRITE_IDXPROP("indices", prim->indices, context->data->accessors); + CGLTF_WRITE_IDXPROP("material", prim->material, context->data->materials); + cgltf_write_line(context, "\"attributes\": {"); + for (cgltf_size i = 0; i < prim->attributes_count; ++i) + { + const cgltf_attribute* attr = prim->attributes + i; + CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors); + } + cgltf_write_line(context, "}"); + + if (prim->targets_count) + { + cgltf_write_line(context, "\"targets\": ["); + for (cgltf_size i = 0; i < prim->targets_count; ++i) + { + cgltf_write_line(context, "{"); + for (cgltf_size j = 0; j < prim->targets[i].attributes_count; ++j) + { + const cgltf_attribute* attr = prim->targets[i].attributes + j; + CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors); + } + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "]"); + } + cgltf_write_extras(context, &prim->extras); + + if (prim->has_draco_mesh_compression || prim->mappings_count > 0) + { + cgltf_write_line(context, "\"extensions\": {"); + + if (prim->has_draco_mesh_compression) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION; + if (prim->attributes_count == 0 || prim->indices == 0) + { + context->required_extension_flags |= CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION; + } + + cgltf_write_line(context, "\"KHR_draco_mesh_compression\": {"); + CGLTF_WRITE_IDXPROP("bufferView", prim->draco_mesh_compression.buffer_view, context->data->buffer_views); + cgltf_write_line(context, "\"attributes\": {"); + for (cgltf_size i = 0; i < prim->draco_mesh_compression.attributes_count; ++i) + { + const cgltf_attribute* attr = prim->draco_mesh_compression.attributes + i; + CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors); + } + cgltf_write_line(context, "}"); + cgltf_write_line(context, "}"); + } + + if (prim->mappings_count > 0) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS; + cgltf_write_line(context, "\"KHR_materials_variants\": {"); + cgltf_write_line(context, "\"mappings\": ["); + for (cgltf_size i = 0; i < prim->mappings_count; ++i) + { + const cgltf_material_mapping* map = prim->mappings + i; + cgltf_write_line(context, "{"); + CGLTF_WRITE_IDXPROP("material", map->material, context->data->materials); + + cgltf_write_indent(context); + CGLTF_SPRINTF("\"variants\": [%d]", (int)map->variant); + context->needs_comma = 1; + + cgltf_write_extras(context, &map->extras); + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "]"); + cgltf_write_line(context, "}"); + } + + cgltf_write_line(context, "}"); + } +} + +static void cgltf_write_mesh(cgltf_write_context* context, const cgltf_mesh* mesh) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", mesh->name); + + cgltf_write_line(context, "\"primitives\": ["); + for (cgltf_size i = 0; i < mesh->primitives_count; ++i) + { + cgltf_write_line(context, "{"); + cgltf_write_primitive(context, mesh->primitives + i); + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "]"); + + if (mesh->weights_count > 0) + { + cgltf_write_floatarrayprop(context, "weights", mesh->weights, mesh->weights_count); + } + + cgltf_write_extras(context, &mesh->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_buffer_view(cgltf_write_context* context, const cgltf_buffer_view* view) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", view->name); + CGLTF_WRITE_IDXPROP("buffer", view->buffer, context->data->buffers); + cgltf_write_sizeprop(context, "byteLength", view->size, (cgltf_size)-1); + cgltf_write_sizeprop(context, "byteOffset", view->offset, 0); + cgltf_write_sizeprop(context, "byteStride", view->stride, 0); + // NOTE: We skip writing "target" because the spec says its usage can be inferred. + cgltf_write_extras(context, &view->extras); + cgltf_write_line(context, "}"); +} + + +static void cgltf_write_buffer(cgltf_write_context* context, const cgltf_buffer* buffer) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", buffer->name); + cgltf_write_strprop(context, "uri", buffer->uri); + cgltf_write_sizeprop(context, "byteLength", buffer->size, (cgltf_size)-1); + cgltf_write_extras(context, &buffer->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_material(cgltf_write_context* context, const cgltf_material* material) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", material->name); + if (material->alpha_mode == cgltf_alpha_mode_mask) + { + cgltf_write_floatprop(context, "alphaCutoff", material->alpha_cutoff, 0.5f); + } + cgltf_write_boolprop_optional(context, "doubleSided", (bool)material->double_sided, false); + // cgltf_write_boolprop_optional(context, "unlit", material->unlit, false); + + if (material->unlit) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT; + } + + if (material->has_pbr_specular_glossiness) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS; + } + + if (material->has_clearcoat) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT; + } + + if (material->has_transmission) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION; + } + + if (material->has_volume) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME; + } + + if (material->has_ior) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_IOR; + } + + if (material->has_specular) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR; + } + + if (material->has_sheen) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN; + } + + if (material->has_emissive_strength) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH; + } + + if (material->has_iridescence) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE; + } + + if (material->has_anisotropy) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY; + } + + if (material->has_pbr_metallic_roughness) + { + const cgltf_pbr_metallic_roughness* params = &material->pbr_metallic_roughness; + cgltf_write_line(context, "\"pbrMetallicRoughness\": {"); + CGLTF_WRITE_TEXTURE_INFO("baseColorTexture", params->base_color_texture); + CGLTF_WRITE_TEXTURE_INFO("metallicRoughnessTexture", params->metallic_roughness_texture); + cgltf_write_floatprop(context, "metallicFactor", params->metallic_factor, 1.0f); + cgltf_write_floatprop(context, "roughnessFactor", params->roughness_factor, 1.0f); + if (cgltf_check_floatarray(params->base_color_factor, 4, 1.0f)) + { + cgltf_write_floatarrayprop(context, "baseColorFactor", params->base_color_factor, 4); + } + cgltf_write_line(context, "}"); + } + + if (material->unlit || material->has_pbr_specular_glossiness || material->has_clearcoat || material->has_ior || material->has_specular || material->has_transmission || material->has_sheen || material->has_volume || material->has_emissive_strength || material->has_iridescence || material->has_anisotropy) + { + cgltf_write_line(context, "\"extensions\": {"); + if (material->has_clearcoat) + { + const cgltf_clearcoat* params = &material->clearcoat; + cgltf_write_line(context, "\"KHR_materials_clearcoat\": {"); + CGLTF_WRITE_TEXTURE_INFO("clearcoatTexture", params->clearcoat_texture); + CGLTF_WRITE_TEXTURE_INFO("clearcoatRoughnessTexture", params->clearcoat_roughness_texture); + CGLTF_WRITE_NORMAL_TEXTURE_INFO("clearcoatNormalTexture", params->clearcoat_normal_texture); + cgltf_write_floatprop(context, "clearcoatFactor", params->clearcoat_factor, 0.0f); + cgltf_write_floatprop(context, "clearcoatRoughnessFactor", params->clearcoat_roughness_factor, 0.0f); + cgltf_write_line(context, "}"); + } + if (material->has_ior) + { + const cgltf_ior* params = &material->ior; + cgltf_write_line(context, "\"KHR_materials_ior\": {"); + cgltf_write_floatprop(context, "ior", params->ior, 1.5f); + cgltf_write_line(context, "}"); + } + if (material->has_specular) + { + const cgltf_specular* params = &material->specular; + cgltf_write_line(context, "\"KHR_materials_specular\": {"); + CGLTF_WRITE_TEXTURE_INFO("specularTexture", params->specular_texture); + CGLTF_WRITE_TEXTURE_INFO("specularColorTexture", params->specular_color_texture); + cgltf_write_floatprop(context, "specularFactor", params->specular_factor, 1.0f); + if (cgltf_check_floatarray(params->specular_color_factor, 3, 1.0f)) + { + cgltf_write_floatarrayprop(context, "specularColorFactor", params->specular_color_factor, 3); + } + cgltf_write_line(context, "}"); + } + if (material->has_transmission) + { + const cgltf_transmission* params = &material->transmission; + cgltf_write_line(context, "\"KHR_materials_transmission\": {"); + CGLTF_WRITE_TEXTURE_INFO("transmissionTexture", params->transmission_texture); + cgltf_write_floatprop(context, "transmissionFactor", params->transmission_factor, 0.0f); + cgltf_write_line(context, "}"); + } + if (material->has_volume) + { + const cgltf_volume* params = &material->volume; + cgltf_write_line(context, "\"KHR_materials_volume\": {"); + CGLTF_WRITE_TEXTURE_INFO("thicknessTexture", params->thickness_texture); + cgltf_write_floatprop(context, "thicknessFactor", params->thickness_factor, 0.0f); + if (cgltf_check_floatarray(params->attenuation_color, 3, 1.0f)) + { + cgltf_write_floatarrayprop(context, "attenuationColor", params->attenuation_color, 3); + } + if (params->attenuation_distance < FLT_MAX) + { + cgltf_write_floatprop(context, "attenuationDistance", params->attenuation_distance, FLT_MAX); + } + cgltf_write_line(context, "}"); + } + if (material->has_sheen) + { + const cgltf_sheen* params = &material->sheen; + cgltf_write_line(context, "\"KHR_materials_sheen\": {"); + CGLTF_WRITE_TEXTURE_INFO("sheenColorTexture", params->sheen_color_texture); + CGLTF_WRITE_TEXTURE_INFO("sheenRoughnessTexture", params->sheen_roughness_texture); + if (cgltf_check_floatarray(params->sheen_color_factor, 3, 0.0f)) + { + cgltf_write_floatarrayprop(context, "sheenColorFactor", params->sheen_color_factor, 3); + } + cgltf_write_floatprop(context, "sheenRoughnessFactor", params->sheen_roughness_factor, 0.0f); + cgltf_write_line(context, "}"); + } + if (material->has_pbr_specular_glossiness) + { + const cgltf_pbr_specular_glossiness* params = &material->pbr_specular_glossiness; + cgltf_write_line(context, "\"KHR_materials_pbrSpecularGlossiness\": {"); + CGLTF_WRITE_TEXTURE_INFO("diffuseTexture", params->diffuse_texture); + CGLTF_WRITE_TEXTURE_INFO("specularGlossinessTexture", params->specular_glossiness_texture); + if (cgltf_check_floatarray(params->diffuse_factor, 4, 1.0f)) + { + cgltf_write_floatarrayprop(context, "diffuseFactor", params->diffuse_factor, 4); + } + if (cgltf_check_floatarray(params->specular_factor, 3, 1.0f)) + { + cgltf_write_floatarrayprop(context, "specularFactor", params->specular_factor, 3); + } + cgltf_write_floatprop(context, "glossinessFactor", params->glossiness_factor, 1.0f); + cgltf_write_line(context, "}"); + } + if (material->unlit) + { + cgltf_write_line(context, "\"KHR_materials_unlit\": {}"); + } + if (material->has_emissive_strength) + { + cgltf_write_line(context, "\"KHR_materials_emissive_strength\": {"); + const cgltf_emissive_strength* params = &material->emissive_strength; + cgltf_write_floatprop(context, "emissiveStrength", params->emissive_strength, 1.f); + cgltf_write_line(context, "}"); + } + if (material->has_iridescence) + { + cgltf_write_line(context, "\"KHR_materials_iridescence\": {"); + const cgltf_iridescence* params = &material->iridescence; + cgltf_write_floatprop(context, "iridescenceFactor", params->iridescence_factor, 0.f); + CGLTF_WRITE_TEXTURE_INFO("iridescenceTexture", params->iridescence_texture); + cgltf_write_floatprop(context, "iridescenceIor", params->iridescence_ior, 1.3f); + cgltf_write_floatprop(context, "iridescenceThicknessMinimum", params->iridescence_thickness_min, 100.f); + cgltf_write_floatprop(context, "iridescenceThicknessMaximum", params->iridescence_thickness_max, 400.f); + CGLTF_WRITE_TEXTURE_INFO("iridescenceThicknessTexture", params->iridescence_thickness_texture); + cgltf_write_line(context, "}"); + } + if (material->has_anisotropy) + { + cgltf_write_line(context, "\"KHR_materials_anisotropy\": {"); + const cgltf_anisotropy* params = &material->anisotropy; + cgltf_write_floatprop(context, "anisotropyFactor", params->anisotropy_strength, 0.f); + cgltf_write_floatprop(context, "anisotropyRotation", params->anisotropy_rotation, 0.f); + CGLTF_WRITE_TEXTURE_INFO("anisotropyTexture", params->anisotropy_texture); + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "}"); + } + + CGLTF_WRITE_NORMAL_TEXTURE_INFO("normalTexture", material->normal_texture); + CGLTF_WRITE_OCCLUSION_TEXTURE_INFO("occlusionTexture", material->occlusion_texture); + CGLTF_WRITE_TEXTURE_INFO("emissiveTexture", material->emissive_texture); + if (cgltf_check_floatarray(material->emissive_factor, 3, 0.0f)) + { + cgltf_write_floatarrayprop(context, "emissiveFactor", material->emissive_factor, 3); + } + cgltf_write_strprop(context, "alphaMode", cgltf_str_from_alpha_mode(material->alpha_mode)); + cgltf_write_extras(context, &material->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_image(cgltf_write_context* context, const cgltf_image* image) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", image->name); + cgltf_write_strprop(context, "uri", image->uri); + CGLTF_WRITE_IDXPROP("bufferView", image->buffer_view, context->data->buffer_views); + cgltf_write_strprop(context, "mimeType", image->mime_type); + cgltf_write_extras(context, &image->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_texture(cgltf_write_context* context, const cgltf_texture* texture) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", texture->name); + CGLTF_WRITE_IDXPROP("source", texture->image, context->data->images); + CGLTF_WRITE_IDXPROP("sampler", texture->sampler, context->data->samplers); + + if (texture->has_basisu) + { + cgltf_write_line(context, "\"extensions\": {"); + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_TEXTURE_BASISU; + cgltf_write_line(context, "\"KHR_texture_basisu\": {"); + CGLTF_WRITE_IDXPROP("source", texture->basisu_image, context->data->images); + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "}"); + } + cgltf_write_extras(context, &texture->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_skin(cgltf_write_context* context, const cgltf_skin* skin) +{ + cgltf_write_line(context, "{"); + CGLTF_WRITE_IDXPROP("skeleton", skin->skeleton, context->data->nodes); + CGLTF_WRITE_IDXPROP("inverseBindMatrices", skin->inverse_bind_matrices, context->data->accessors); + CGLTF_WRITE_IDXARRPROP("joints", skin->joints_count, skin->joints, context->data->nodes); + cgltf_write_strprop(context, "name", skin->name); + cgltf_write_extras(context, &skin->extras); + cgltf_write_line(context, "}"); +} + +static const char* cgltf_write_str_path_type(cgltf_animation_path_type path_type) +{ + switch (path_type) + { + case cgltf_animation_path_type_translation: + return "translation"; + case cgltf_animation_path_type_rotation: + return "rotation"; + case cgltf_animation_path_type_scale: + return "scale"; + case cgltf_animation_path_type_weights: + return "weights"; + default: + break; + } + return "invalid"; +} + +static const char* cgltf_write_str_interpolation_type(cgltf_interpolation_type interpolation_type) +{ + switch (interpolation_type) + { + case cgltf_interpolation_type_linear: + return "LINEAR"; + case cgltf_interpolation_type_step: + return "STEP"; + case cgltf_interpolation_type_cubic_spline: + return "CUBICSPLINE"; + default: + break; + } + return "invalid"; +} + +static void cgltf_write_path_type(cgltf_write_context* context, const char *label, cgltf_animation_path_type path_type) +{ + cgltf_write_strprop(context, label, cgltf_write_str_path_type(path_type)); +} + +static void cgltf_write_interpolation_type(cgltf_write_context* context, const char *label, cgltf_interpolation_type interpolation_type) +{ + cgltf_write_strprop(context, label, cgltf_write_str_interpolation_type(interpolation_type)); +} + +static void cgltf_write_animation_sampler(cgltf_write_context* context, const cgltf_animation_sampler* animation_sampler) +{ + cgltf_write_line(context, "{"); + cgltf_write_interpolation_type(context, "interpolation", animation_sampler->interpolation); + CGLTF_WRITE_IDXPROP("input", animation_sampler->input, context->data->accessors); + CGLTF_WRITE_IDXPROP("output", animation_sampler->output, context->data->accessors); + cgltf_write_extras(context, &animation_sampler->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_animation_channel(cgltf_write_context* context, const cgltf_animation* animation, const cgltf_animation_channel* animation_channel) +{ + cgltf_write_line(context, "{"); + CGLTF_WRITE_IDXPROP("sampler", animation_channel->sampler, animation->samplers); + cgltf_write_line(context, "\"target\": {"); + CGLTF_WRITE_IDXPROP("node", animation_channel->target_node, context->data->nodes); + cgltf_write_path_type(context, "path", animation_channel->target_path); + cgltf_write_line(context, "}"); + cgltf_write_extras(context, &animation_channel->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_animation(cgltf_write_context* context, const cgltf_animation* animation) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", animation->name); + + if (animation->samplers_count > 0) + { + cgltf_write_line(context, "\"samplers\": ["); + for (cgltf_size i = 0; i < animation->samplers_count; ++i) + { + cgltf_write_animation_sampler(context, animation->samplers + i); + } + cgltf_write_line(context, "]"); + } + if (animation->channels_count > 0) + { + cgltf_write_line(context, "\"channels\": ["); + for (cgltf_size i = 0; i < animation->channels_count; ++i) + { + cgltf_write_animation_channel(context, animation, animation->channels + i); + } + cgltf_write_line(context, "]"); + } + cgltf_write_extras(context, &animation->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_sampler(cgltf_write_context* context, const cgltf_sampler* sampler) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", sampler->name); + cgltf_write_intprop(context, "magFilter", sampler->mag_filter, 0); + cgltf_write_intprop(context, "minFilter", sampler->min_filter, 0); + cgltf_write_intprop(context, "wrapS", sampler->wrap_s, 10497); + cgltf_write_intprop(context, "wrapT", sampler->wrap_t, 10497); + cgltf_write_extras(context, &sampler->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_node(cgltf_write_context* context, const cgltf_node* node) +{ + cgltf_write_line(context, "{"); + CGLTF_WRITE_IDXARRPROP("children", node->children_count, node->children, context->data->nodes); + CGLTF_WRITE_IDXPROP("mesh", node->mesh, context->data->meshes); + cgltf_write_strprop(context, "name", node->name); + if (node->has_matrix) + { + cgltf_write_floatarrayprop(context, "matrix", node->matrix, 16); + } + if (node->has_translation) + { + cgltf_write_floatarrayprop(context, "translation", node->translation, 3); + } + if (node->has_rotation) + { + cgltf_write_floatarrayprop(context, "rotation", node->rotation, 4); + } + if (node->has_scale) + { + cgltf_write_floatarrayprop(context, "scale", node->scale, 3); + } + if (node->skin) + { + CGLTF_WRITE_IDXPROP("skin", node->skin, context->data->skins); + } + + bool has_extension = node->light || (node->has_mesh_gpu_instancing && node->mesh_gpu_instancing.attributes_count > 0); + if(has_extension) + cgltf_write_line(context, "\"extensions\": {"); + + if (node->light) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL; + cgltf_write_line(context, "\"KHR_lights_punctual\": {"); + CGLTF_WRITE_IDXPROP("light", node->light, context->data->lights); + cgltf_write_line(context, "}"); + } + + if (node->has_mesh_gpu_instancing && node->mesh_gpu_instancing.attributes_count > 0) + { + context->extension_flags |= CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING; + context->required_extension_flags |= CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING; + + cgltf_write_line(context, "\"EXT_mesh_gpu_instancing\": {"); + { + cgltf_write_line(context, "\"attributes\": {"); + { + for (cgltf_size i = 0; i < node->mesh_gpu_instancing.attributes_count; ++i) + { + const cgltf_attribute* attr = node->mesh_gpu_instancing.attributes + i; + CGLTF_WRITE_IDXPROP(attr->name, attr->data, context->data->accessors); + } + } + cgltf_write_line(context, "}"); + } + cgltf_write_line(context, "}"); + } + + if (has_extension) + cgltf_write_line(context, "}"); + + if (node->weights_count > 0) + { + cgltf_write_floatarrayprop(context, "weights", node->weights, node->weights_count); + } + + if (node->camera) + { + CGLTF_WRITE_IDXPROP("camera", node->camera, context->data->cameras); + } + + cgltf_write_extras(context, &node->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_scene(cgltf_write_context* context, const cgltf_scene* scene) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", scene->name); + CGLTF_WRITE_IDXARRPROP("nodes", scene->nodes_count, scene->nodes, context->data->nodes); + cgltf_write_extras(context, &scene->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_accessor(cgltf_write_context* context, const cgltf_accessor* accessor) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", accessor->name); + CGLTF_WRITE_IDXPROP("bufferView", accessor->buffer_view, context->data->buffer_views); + cgltf_write_intprop(context, "componentType", cgltf_int_from_component_type(accessor->component_type), 0); + cgltf_write_strprop(context, "type", cgltf_str_from_type(accessor->type)); + cgltf_size dim = cgltf_dim_from_type(accessor->type); + cgltf_write_boolprop_optional(context, "normalized", (bool)accessor->normalized, false); + cgltf_write_sizeprop(context, "byteOffset", (int)accessor->offset, 0); + cgltf_write_intprop(context, "count", (int)accessor->count, -1); + if (accessor->has_min) + { + cgltf_write_floatarrayprop(context, "min", accessor->min, dim); + } + if (accessor->has_max) + { + cgltf_write_floatarrayprop(context, "max", accessor->max, dim); + } + if (accessor->is_sparse) + { + cgltf_write_line(context, "\"sparse\": {"); + cgltf_write_intprop(context, "count", (int)accessor->sparse.count, 0); + cgltf_write_line(context, "\"indices\": {"); + cgltf_write_sizeprop(context, "byteOffset", (int)accessor->sparse.indices_byte_offset, 0); + CGLTF_WRITE_IDXPROP("bufferView", accessor->sparse.indices_buffer_view, context->data->buffer_views); + cgltf_write_intprop(context, "componentType", cgltf_int_from_component_type(accessor->sparse.indices_component_type), 0); + cgltf_write_extras(context, &accessor->sparse.indices_extras); + cgltf_write_line(context, "}"); + cgltf_write_line(context, "\"values\": {"); + cgltf_write_sizeprop(context, "byteOffset", (int)accessor->sparse.values_byte_offset, 0); + CGLTF_WRITE_IDXPROP("bufferView", accessor->sparse.values_buffer_view, context->data->buffer_views); + cgltf_write_extras(context, &accessor->sparse.values_extras); + cgltf_write_line(context, "}"); + cgltf_write_extras(context, &accessor->sparse.extras); + cgltf_write_line(context, "}"); + } + cgltf_write_extras(context, &accessor->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_camera(cgltf_write_context* context, const cgltf_camera* camera) +{ + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "type", cgltf_str_from_camera_type(camera->type)); + if (camera->name) + { + cgltf_write_strprop(context, "name", camera->name); + } + + if (camera->type == cgltf_camera_type_orthographic) + { + cgltf_write_line(context, "\"orthographic\": {"); + cgltf_write_floatprop(context, "xmag", camera->data.orthographic.xmag, -1.0f); + cgltf_write_floatprop(context, "ymag", camera->data.orthographic.ymag, -1.0f); + cgltf_write_floatprop(context, "zfar", camera->data.orthographic.zfar, -1.0f); + cgltf_write_floatprop(context, "znear", camera->data.orthographic.znear, -1.0f); + cgltf_write_extras(context, &camera->data.orthographic.extras); + cgltf_write_line(context, "}"); + } + else if (camera->type == cgltf_camera_type_perspective) + { + cgltf_write_line(context, "\"perspective\": {"); + + if (camera->data.perspective.has_aspect_ratio) { + cgltf_write_floatprop(context, "aspectRatio", camera->data.perspective.aspect_ratio, -1.0f); + } + + cgltf_write_floatprop(context, "yfov", camera->data.perspective.yfov, -1.0f); + + if (camera->data.perspective.has_zfar) { + cgltf_write_floatprop(context, "zfar", camera->data.perspective.zfar, -1.0f); + } + + cgltf_write_floatprop(context, "znear", camera->data.perspective.znear, -1.0f); + cgltf_write_extras(context, &camera->data.perspective.extras); + cgltf_write_line(context, "}"); + } + cgltf_write_extras(context, &camera->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_light(cgltf_write_context* context, const cgltf_light* light) +{ + context->extension_flags |= CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL; + + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "type", cgltf_str_from_light_type(light->type)); + if (light->name) + { + cgltf_write_strprop(context, "name", light->name); + } + if (cgltf_check_floatarray(light->color, 3, 1.0f)) + { + cgltf_write_floatarrayprop(context, "color", light->color, 3); + } + cgltf_write_floatprop(context, "intensity", light->intensity, 1.0f); + cgltf_write_floatprop(context, "range", light->range, 0.0f); + + if (light->type == cgltf_light_type_spot) + { + cgltf_write_line(context, "\"spot\": {"); + cgltf_write_floatprop(context, "innerConeAngle", light->spot_inner_cone_angle, 0.0f); + cgltf_write_floatprop(context, "outerConeAngle", light->spot_outer_cone_angle, 3.14159265358979323846f/4.0f); + cgltf_write_line(context, "}"); + } + cgltf_write_extras( context, &light->extras ); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_variant(cgltf_write_context* context, const cgltf_material_variant* variant) +{ + context->extension_flags |= CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS; + + cgltf_write_line(context, "{"); + cgltf_write_strprop(context, "name", variant->name); + cgltf_write_extras(context, &variant->extras); + cgltf_write_line(context, "}"); +} + +static void cgltf_write_glb(FILE* file, const void* json_buf, const cgltf_size json_size, const void* bin_buf, const cgltf_size bin_size) +{ + char header[GlbHeaderSize]; + char chunk_header[GlbChunkHeaderSize]; + char json_pad[3] = { 0x20, 0x20, 0x20 }; + char bin_pad[3] = { 0, 0, 0 }; + + cgltf_size json_padsize = (json_size % 4 != 0) ? 4 - json_size % 4 : 0; + cgltf_size bin_padsize = (bin_size % 4 != 0) ? 4 - bin_size % 4 : 0; + cgltf_size total_size = GlbHeaderSize + GlbChunkHeaderSize + json_size + json_padsize; + if (bin_buf != NULL && bin_size > 0) { + total_size += GlbChunkHeaderSize + bin_size + bin_padsize; + } + + // Write a GLB header + memcpy(header, &GlbMagic, 4); + memcpy(header + 4, &GlbVersion, 4); + memcpy(header + 8, &total_size, 4); + fwrite(header, 1, GlbHeaderSize, file); + + // Write a JSON chunk (header & data) + uint32_t json_chunk_size = (uint32_t)(json_size + json_padsize); + memcpy(chunk_header, &json_chunk_size, 4); + memcpy(chunk_header + 4, &GlbMagicJsonChunk, 4); + fwrite(chunk_header, 1, GlbChunkHeaderSize, file); + + fwrite(json_buf, 1, json_size, file); + fwrite(json_pad, 1, json_padsize, file); + + if (bin_buf != NULL && bin_size > 0) { + // Write a binary chunk (header & data) + uint32_t bin_chunk_size = (uint32_t)(bin_size + bin_padsize); + memcpy(chunk_header, &bin_chunk_size, 4); + memcpy(chunk_header + 4, &GlbMagicBinChunk, 4); + fwrite(chunk_header, 1, GlbChunkHeaderSize, file); + + fwrite(bin_buf, 1, bin_size, file); + fwrite(bin_pad, 1, bin_padsize, file); + } +} + +cgltf_result cgltf_write_file(const cgltf_options* options, const char* path, const cgltf_data* data) +{ + cgltf_size expected = cgltf_write(options, NULL, 0, data); + char* buffer = (char*) malloc(expected); + cgltf_size actual = cgltf_write(options, buffer, expected, data); + if (expected != actual) { + fprintf(stderr, "Error: expected %zu bytes but wrote %zu bytes.\n", expected, actual); + } + FILE* file = fopen(path, "wb"); + if (!file) + { + return cgltf_result_file_not_found; + } + // Note that cgltf_write() includes a null terminator, which we omit from the file content. + if (options->type == cgltf_file_type_glb) { + cgltf_write_glb(file, buffer, actual - 1, data->bin, data->bin_size); + } else { + // Write a plain JSON file. + fwrite(buffer, actual - 1, 1, file); + } + fclose(file); + free(buffer); + return cgltf_result_success; +} + +static void cgltf_write_extensions(cgltf_write_context* context, uint32_t extension_flags) +{ + if (extension_flags & CGLTF_EXTENSION_FLAG_TEXTURE_TRANSFORM) { + cgltf_write_stritem(context, "KHR_texture_transform"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_UNLIT) { + cgltf_write_stritem(context, "KHR_materials_unlit"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_SPECULAR_GLOSSINESS) { + cgltf_write_stritem(context, "KHR_materials_pbrSpecularGlossiness"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_LIGHTS_PUNCTUAL) { + cgltf_write_stritem(context, "KHR_lights_punctual"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_DRACO_MESH_COMPRESSION) { + cgltf_write_stritem(context, "KHR_draco_mesh_compression"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_CLEARCOAT) { + cgltf_write_stritem(context, "KHR_materials_clearcoat"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_IOR) { + cgltf_write_stritem(context, "KHR_materials_ior"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_SPECULAR) { + cgltf_write_stritem(context, "KHR_materials_specular"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_TRANSMISSION) { + cgltf_write_stritem(context, "KHR_materials_transmission"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_SHEEN) { + cgltf_write_stritem(context, "KHR_materials_sheen"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_VARIANTS) { + cgltf_write_stritem(context, "KHR_materials_variants"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_VOLUME) { + cgltf_write_stritem(context, "KHR_materials_volume"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_TEXTURE_BASISU) { + cgltf_write_stritem(context, "KHR_texture_basisu"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_EMISSIVE_STRENGTH) { + cgltf_write_stritem(context, "KHR_materials_emissive_strength"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_IRIDESCENCE) { + cgltf_write_stritem(context, "KHR_materials_iridescence"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MATERIALS_ANISOTROPY) { + cgltf_write_stritem(context, "KHR_materials_anisotropy"); + } + if (extension_flags & CGLTF_EXTENSION_FLAG_MESH_GPU_INSTANCING) { + cgltf_write_stritem(context, "EXT_mesh_gpu_instancing"); + } +} + +cgltf_size cgltf_write(const cgltf_options* options, char* buffer, cgltf_size size, const cgltf_data* data) +{ + (void)options; + cgltf_write_context ctx; + ctx.buffer = buffer; + ctx.buffer_size = size; + ctx.remaining = size; + ctx.cursor = buffer; + ctx.chars_written = 0; + ctx.data = data; + ctx.depth = 1; + ctx.indent = " "; + ctx.needs_comma = 0; + ctx.extension_flags = 0; + ctx.required_extension_flags = 0; + + cgltf_write_context* context = &ctx; + + CGLTF_SPRINTF("{"); + + if (data->accessors_count > 0) + { + cgltf_write_line(context, "\"accessors\": ["); + for (cgltf_size i = 0; i < data->accessors_count; ++i) + { + cgltf_write_accessor(context, data->accessors + i); + } + cgltf_write_line(context, "]"); + } + + cgltf_write_asset(context, &data->asset); + + if (data->buffer_views_count > 0) + { + cgltf_write_line(context, "\"bufferViews\": ["); + for (cgltf_size i = 0; i < data->buffer_views_count; ++i) + { + cgltf_write_buffer_view(context, data->buffer_views + i); + } + cgltf_write_line(context, "]"); + } + + if (data->buffers_count > 0) + { + cgltf_write_line(context, "\"buffers\": ["); + for (cgltf_size i = 0; i < data->buffers_count; ++i) + { + cgltf_write_buffer(context, data->buffers + i); + } + cgltf_write_line(context, "]"); + } + + if (data->images_count > 0) + { + cgltf_write_line(context, "\"images\": ["); + for (cgltf_size i = 0; i < data->images_count; ++i) + { + cgltf_write_image(context, data->images + i); + } + cgltf_write_line(context, "]"); + } + + if (data->meshes_count > 0) + { + cgltf_write_line(context, "\"meshes\": ["); + for (cgltf_size i = 0; i < data->meshes_count; ++i) + { + cgltf_write_mesh(context, data->meshes + i); + } + cgltf_write_line(context, "]"); + } + + if (data->materials_count > 0) + { + cgltf_write_line(context, "\"materials\": ["); + for (cgltf_size i = 0; i < data->materials_count; ++i) + { + cgltf_write_material(context, data->materials + i); + } + cgltf_write_line(context, "]"); + } + + if (data->nodes_count > 0) + { + cgltf_write_line(context, "\"nodes\": ["); + for (cgltf_size i = 0; i < data->nodes_count; ++i) + { + cgltf_write_node(context, data->nodes + i); + } + cgltf_write_line(context, "]"); + } + + if (data->samplers_count > 0) + { + cgltf_write_line(context, "\"samplers\": ["); + for (cgltf_size i = 0; i < data->samplers_count; ++i) + { + cgltf_write_sampler(context, data->samplers + i); + } + cgltf_write_line(context, "]"); + } + + CGLTF_WRITE_IDXPROP("scene", data->scene, data->scenes); + + if (data->scenes_count > 0) + { + cgltf_write_line(context, "\"scenes\": ["); + for (cgltf_size i = 0; i < data->scenes_count; ++i) + { + cgltf_write_scene(context, data->scenes + i); + } + cgltf_write_line(context, "]"); + } + + if (data->textures_count > 0) + { + cgltf_write_line(context, "\"textures\": ["); + for (cgltf_size i = 0; i < data->textures_count; ++i) + { + cgltf_write_texture(context, data->textures + i); + } + cgltf_write_line(context, "]"); + } + + if (data->skins_count > 0) + { + cgltf_write_line(context, "\"skins\": ["); + for (cgltf_size i = 0; i < data->skins_count; ++i) + { + cgltf_write_skin(context, data->skins + i); + } + cgltf_write_line(context, "]"); + } + + if (data->animations_count > 0) + { + cgltf_write_line(context, "\"animations\": ["); + for (cgltf_size i = 0; i < data->animations_count; ++i) + { + cgltf_write_animation(context, data->animations + i); + } + cgltf_write_line(context, "]"); + } + + if (data->cameras_count > 0) + { + cgltf_write_line(context, "\"cameras\": ["); + for (cgltf_size i = 0; i < data->cameras_count; ++i) + { + cgltf_write_camera(context, data->cameras + i); + } + cgltf_write_line(context, "]"); + } + + if (data->lights_count > 0 || data->variants_count > 0) + { + cgltf_write_line(context, "\"extensions\": {"); + + if (data->lights_count > 0) + { + cgltf_write_line(context, "\"KHR_lights_punctual\": {"); + cgltf_write_line(context, "\"lights\": ["); + for (cgltf_size i = 0; i < data->lights_count; ++i) + { + cgltf_write_light(context, data->lights + i); + } + cgltf_write_line(context, "]"); + cgltf_write_line(context, "}"); + } + + if (data->variants_count) + { + cgltf_write_line(context, "\"KHR_materials_variants\": {"); + cgltf_write_line(context, "\"variants\": ["); + for (cgltf_size i = 0; i < data->variants_count; ++i) + { + cgltf_write_variant(context, data->variants + i); + } + cgltf_write_line(context, "]"); + cgltf_write_line(context, "}"); + } + + cgltf_write_line(context, "}"); + } + + if (context->extension_flags != 0) + { + cgltf_write_line(context, "\"extensionsUsed\": ["); + cgltf_write_extensions(context, context->extension_flags); + cgltf_write_line(context, "]"); + } + + if (context->required_extension_flags != 0) + { + cgltf_write_line(context, "\"extensionsRequired\": ["); + cgltf_write_extensions(context, context->required_extension_flags); + cgltf_write_line(context, "]"); + } + + cgltf_write_extras(context, &data->extras); + + CGLTF_SPRINTF("\n}\n"); + + // snprintf does not include the null terminator in its return value, so be sure to include it + // in the returned byte count. + return 1 + ctx.chars_written; +} + +#endif /* #ifdef CGLTF_WRITE_IMPLEMENTATION */ + +/* cgltf is distributed under MIT license: + * + * Copyright (c) 2019-2021 Philip Rideout + + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ diff --git a/libkram/compressonator/bc6h/bc6h_decode.cpp b/libkram/compressonator/bc6h/bc6h_decode.cpp index f3d7be77..7bc4643a 100644 --- a/libkram/compressonator/bc6h/bc6h_decode.cpp +++ b/libkram/compressonator/bc6h/bc6h_decode.cpp @@ -210,6 +210,7 @@ int lerp(int a, int b, int i, int denom) { case 3: denom *= 5; i *= 5; // fall through to case 15 + [[fallthrough]]; case 15: weights = g_aWeights4; break; diff --git a/libkram/compressonator/bc6h/bc6h_encode.cpp b/libkram/compressonator/bc6h/bc6h_encode.cpp index 97fd41cd..b371e1de 100644 --- a/libkram/compressonator/bc6h/bc6h_encode.cpp +++ b/libkram/compressonator/bc6h/bc6h_encode.cpp @@ -473,7 +473,10 @@ void BC6HBlockEncoder::QuantizeEndPointToF16Prec(float EndPoints[MAX_SUBSETS][MA so that indices at fix up points have higher order bit set to 0 ==================================================================*/ -void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[3][BC6H_MAX_SUBSET_SIZE], int entryCount[BC6H_MAX_SUBSETS], int max_subsets, int mode, int shape_pattern) { +void BC6HBlockEncoder::SwapIndices(int iEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], int iIndices[MAX_SUBSETS][BC6H_MAX_SUBSET_SIZE], + // int entryCount[BC6H_MAX_SUBSETS], // this is 2 but callers pass array[MAX_SUBSETS] + int entryCount[MAX_SUBSETS], // to keep compiler happy + int max_subsets, int mode, int shape_pattern) { unsigned int uNumIndices = 1 << ModePartition[mode].IndexPrec; unsigned int uHighIndexBit = uNumIndices >> 1; @@ -594,7 +597,7 @@ bool BC6HBlockEncoder::TransformEndPoints(AMD_BC6H_Format &BC6H_data, int iEndPo void BC6HBlockEncoder::SaveCompressedBlockData( AMD_BC6H_Format &BC6H_data, int oEndPoints[MAX_SUBSETS][MAX_END_POINTS][MAX_DIMENSION_BIG], - int iIndices[2][MAX_SUBSET_SIZE], + int iIndices[MAX_SUBSETS][MAX_SUBSET_SIZE], // did harcode 2 = BC6H_MAX_SUBSET_SIZE, but not what is passed int max_subsets, int mode) { BC6H_data.m_mode = (unsigned short)mode; diff --git a/libkram/compressonator/bc6h/hdr_encode.cpp b/libkram/compressonator/bc6h/hdr_encode.cpp index 32b4090f..e2f76ba0 100644 --- a/libkram/compressonator/bc6h/hdr_encode.cpp +++ b/libkram/compressonator/bc6h/hdr_encode.cpp @@ -72,6 +72,7 @@ float lerpf(float a, float b, int i, int denom) { case 3: denom *= 5; i *= 5; // fall through to case 15 + [[fallthrough]]; case 7: weights = g_aWeights3; break; diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h index ccfeb2f9..1d59a9b1 100644 --- a/libkram/eastl/include/EASTL/chrono.h +++ b/libkram/eastl/include/EASTL/chrono.h @@ -584,9 +584,42 @@ namespace chrono EA_RESTORE_VC_WARNING() return uint64_t(frequency * queryCounter()); #elif defined EA_PLATFORM_SONY - return sceKernelGetProcessTimeCounter(); + auto queryFrequency = [] + { + // nanoseconds/seconds / ticks/seconds + return double(1000000000.0L / (long double)sceKernelGetProcessTimeCounterFrequency()); // nanoseconds per tick + }; + + auto queryCounter = [] + { + return sceKernelGetProcessTimeCounter(); + }; + + EA_DISABLE_VC_WARNING(4640) // warning C4640: construction of local static object is not thread-safe (VS2013) + static auto frequency = queryFrequency(); // cache cpu frequency on first call + EA_RESTORE_VC_WARNING() + return uint64_t(frequency * (double)queryCounter()); #elif defined(EA_PLATFORM_APPLE) - return mach_absolute_time(); + // took this from newer from newer drop of EASTL from 2022 release on 11/8/24 + // Note that numer/denom will often be 1 and 1, so can skip math. + // but is 125/3 on some iOS and M1. Added inNanos check. Test. + auto queryTimeInfo = [] + { + mach_timebase_info_data_t info; + mach_timebase_info(&info); + return info; + }; + + uint64_t t = mach_absolute_time(); + + static auto timeInfo = queryTimeInfo(); + static const bool isNanos = timeInfo.numer == 1 && timeInfo.denom == 1; + if (!isNanos) + { + t *= timeInfo.numer; + t /= timeInfo.denom; + } + return t; #elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). #if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)) timespec ts; diff --git a/libkram/etc2comp/Etc.cpp b/libkram/etc2comp/Etc.cpp index 059e86c1..95151ddd 100644 --- a/libkram/etc2comp/Etc.cpp +++ b/libkram/etc2comp/Etc.cpp @@ -1,142 +1,142 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "EtcConfig.h" -#include "Etc.h" -#include "EtcFilter.h" - -#include - -namespace Etc -{ - // ---------------------------------------------------------------------------------------------------- - // C-style inteface to the encoder - // - void Encode(float *a_pafSourceRGBA, - unsigned int a_uiSourceWidth, - unsigned int a_uiSourceHeight, - Image::Format a_format, - ErrorMetric a_eErrMetric, - float a_fEffort, - unsigned int a_uiJobs, - unsigned int a_uiMaxJobs, - unsigned char **a_ppaucEncodingBits, - unsigned int *a_puiEncodingBitsBytes, - unsigned int *a_puiExtendedWidth, - unsigned int *a_puiExtendedHeight, - int *a_piEncodingTime_ms, bool a_bVerboseOutput) - { - - Image image(a_pafSourceRGBA, a_uiSourceWidth, - a_uiSourceHeight, - a_eErrMetric); - image.m_bVerboseOutput = a_bVerboseOutput; - image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); - - *a_ppaucEncodingBits = image.GetEncodingBits(); - *a_puiEncodingBitsBytes = image.GetEncodingBitsBytes(); - //*a_puiExtendedWidth = image.GetExtendedWidth(); - //*a_puiExtendedHeight = image.GetExtendedHeight(); - *a_piEncodingTime_ms = image.GetEncodingTimeMs(); - } - - void EncodeMipmaps(float *a_pafSourceRGBA, - unsigned int a_uiSourceWidth, - unsigned int a_uiSourceHeight, - Image::Format a_format, - ErrorMetric a_eErrMetric, - float a_fEffort, - unsigned int a_uiJobs, - unsigned int a_uiMaxJobs, - unsigned int a_uiMaxMipmaps, - unsigned int a_uiMipFilterFlags, - RawImage* a_pMipmapImages, - int *a_piEncodingTime_ms, - bool a_bVerboseOutput) - { - auto mipWidth = a_uiSourceWidth; - auto mipHeight = a_uiSourceHeight; - int totalEncodingTime = 0; - for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++) - { - float* pImageData = nullptr; - float* pMipImage = nullptr; - - if(mip == 0) - { - pImageData = a_pafSourceRGBA; - } - else - { - pMipImage = new float[mipWidth*mipHeight*4]; - if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) ) - { - pImageData = pMipImage; - } - } - - if ( pImageData ) - { - - Image image(pImageData, mipWidth, mipHeight, a_eErrMetric); - - image.m_bVerboseOutput = a_bVerboseOutput; - image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); - - a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; }); - a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes(); - //a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth(); - //a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight(); - - totalEncodingTime += image.GetEncodingTimeMs(); - } - - if(pMipImage) - { - delete[] pMipImage; - } - - if (!pImageData) - { - break; - } - - mipWidth >>= 1; - mipHeight >>= 1; - - // Get out of the loop if both shifted dimensions are zero - if ((mipWidth==0) && (mipHeight==0)) - { - break; - } - // Make sure to generate mipmap chain down to 1x1 for iOS - if (mipWidth==0) - { - mipWidth = 1; - } - if (mipHeight==0) { - mipHeight = 1; - } - } - - *a_piEncodingTime_ms = totalEncodingTime; - } - - - // ---------------------------------------------------------------------------------------------------- - // - -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EtcConfig.h" +#include "Etc.h" +#include "EtcFilter.h" + +#include + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // C-style inteface to the encoder + // + void Encode(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned char **a_ppaucEncodingBits, + unsigned int *a_puiEncodingBitsBytes, + unsigned int *a_puiExtendedWidth, + unsigned int *a_puiExtendedHeight, + int *a_piEncodingTime_ms, bool a_bVerboseOutput) + { + + Image image(a_pafSourceRGBA, a_uiSourceWidth, + a_uiSourceHeight, + a_eErrMetric); + image.m_bVerboseOutput = a_bVerboseOutput; + image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); + + *a_ppaucEncodingBits = image.GetEncodingBits(); + *a_puiEncodingBitsBytes = image.GetEncodingBitsBytes(); + //*a_puiExtendedWidth = image.GetExtendedWidth(); + //*a_puiExtendedHeight = image.GetExtendedHeight(); + *a_piEncodingTime_ms = image.GetEncodingTimeMs(); + } + + void EncodeMipmaps(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned int a_uiMaxMipmaps, + unsigned int a_uiMipFilterFlags, + RawImage* a_pMipmapImages, + int *a_piEncodingTime_ms, + bool a_bVerboseOutput) + { + auto mipWidth = a_uiSourceWidth; + auto mipHeight = a_uiSourceHeight; + int totalEncodingTime = 0; + for(unsigned int mip = 0; mip < a_uiMaxMipmaps && mipWidth >= 1 && mipHeight >= 1; mip++) + { + float* pImageData = nullptr; + float* pMipImage = nullptr; + + if(mip == 0) + { + pImageData = a_pafSourceRGBA; + } + else + { + pMipImage = new float[mipWidth*mipHeight*4]; + if(FilterTwoPass(a_pafSourceRGBA, a_uiSourceWidth, a_uiSourceHeight, pMipImage, mipWidth, mipHeight, a_uiMipFilterFlags, Etc::FilterLanczos3) ) + { + pImageData = pMipImage; + } + } + + if ( pImageData ) + { + + Image image(pImageData, mipWidth, mipHeight, a_eErrMetric); + + image.m_bVerboseOutput = a_bVerboseOutput; + image.Encode(a_format, a_eErrMetric, a_fEffort, a_uiJobs, a_uiMaxJobs); + + a_pMipmapImages[mip].paucEncodingBits = std::shared_ptr(image.GetEncodingBits(), [](unsigned char *p) { delete[] p; }); + a_pMipmapImages[mip].uiEncodingBitsBytes = image.GetEncodingBitsBytes(); + //a_pMipmapImages[mip].uiExtendedWidth = image.GetExtendedWidth(); + //a_pMipmapImages[mip].uiExtendedHeight = image.GetExtendedHeight(); + + totalEncodingTime += image.GetEncodingTimeMs(); + } + + if(pMipImage) + { + delete[] pMipImage; + } + + if (!pImageData) + { + break; + } + + mipWidth >>= 1; + mipHeight >>= 1; + + // Get out of the loop if both shifted dimensions are zero + if ((mipWidth==0) && (mipHeight==0)) + { + break; + } + // Make sure to generate mipmap chain down to 1x1 for iOS + if (mipWidth==0) + { + mipWidth = 1; + } + if (mipHeight==0) { + mipHeight = 1; + } + } + + *a_piEncodingTime_ms = totalEncodingTime; + } + + + // ---------------------------------------------------------------------------------------------------- + // + +} diff --git a/libkram/etc2comp/Etc.h b/libkram/etc2comp/Etc.h index 90962efb..439388d6 100644 --- a/libkram/etc2comp/Etc.h +++ b/libkram/etc2comp/Etc.h @@ -1,71 +1,71 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcConfig.h" -#include "EtcImage.h" -#include "EtcColor.h" -#include "EtcErrorMetric.h" -#include - -#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f) -#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f) -#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f) - -namespace Etc -{ - class Block4x4EncodingBits; - - struct RawImage - { - int uiExtendedWidth; - int uiExtendedHeight; - unsigned int uiEncodingBitsBytes; - std::shared_ptr paucEncodingBits; - }; - - - - // C-style inteface to the encoder - void Encode(float *a_pafSourceRGBA, - unsigned int a_uiSourceWidth, - unsigned int a_uiSourceHeight, - Image::Format a_format, - ErrorMetric a_eErrMetric, - float a_fEffort, - unsigned int a_uiJobs, - unsigned int a_uimaxJobs, - unsigned char **a_ppaucEncodingBits, - unsigned int *a_puiEncodingBitsBytes, - unsigned int *a_puiExtendedWidth, - unsigned int *a_puiExtendedHeight, - int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); - - void EncodeMipmaps(float *a_pafSourceRGBA, - unsigned int a_uiSourceWidth, - unsigned int a_uiSourceHeight, - Image::Format a_format, - ErrorMetric a_eErrMetric, - float a_fEffort, - unsigned int a_uiJobs, - unsigned int a_uiMaxJobs, - unsigned int a_uiMaxMipmaps, - unsigned int a_uiMipFilterFlags, - RawImage* a_pMipmaps, - int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); - -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcConfig.h" +#include "EtcImage.h" +#include "EtcColor.h" +#include "EtcErrorMetric.h" +#include + +#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f) +#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f) +#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f) + +namespace Etc +{ + class Block4x4EncodingBits; + + struct RawImage + { + int uiExtendedWidth; + int uiExtendedHeight; + unsigned int uiEncodingBitsBytes; + std::shared_ptr paucEncodingBits; + }; + + + + // C-style inteface to the encoder + void Encode(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uimaxJobs, + unsigned char **a_ppaucEncodingBits, + unsigned int *a_puiEncodingBitsBytes, + unsigned int *a_puiExtendedWidth, + unsigned int *a_puiExtendedHeight, + int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); + + void EncodeMipmaps(float *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + Image::Format a_format, + ErrorMetric a_eErrMetric, + float a_fEffort, + unsigned int a_uiJobs, + unsigned int a_uiMaxJobs, + unsigned int a_uiMaxMipmaps, + unsigned int a_uiMipFilterFlags, + RawImage* a_pMipmaps, + int *a_piEncodingTime_ms, bool a_bVerboseOutput = false); + +} diff --git a/libkram/etc2comp/EtcBlock4x4.cpp b/libkram/etc2comp/EtcBlock4x4.cpp index ce753774..de71c663 100644 --- a/libkram/etc2comp/EtcBlock4x4.cpp +++ b/libkram/etc2comp/EtcBlock4x4.cpp @@ -1,317 +1,317 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4.cpp - -Implements the state associated with each 4x4 block of pixels in an image - -Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an -alpha of NAN - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4.h" - -//#include "EtcColor.h" -#include "EtcImage.h" -#include "EtcColorFloatRGBA.h" - -// only the rgb/a encoders use Block4x4 -#include "EtcBlock4x4EncodingBits.h" -#include "EtcBlock4x4Encoding_ETC1.h" -#include "EtcBlock4x4Encoding_RGB8.h" -#include "EtcBlock4x4Encoding_RGBA8.h" -#include "EtcBlock4x4Encoding_RGB8A1.h" - -#include -#include -#include - -namespace Etc -{ - // ETC pixels are scanned vertically. - // this mapping is for when someone wants to scan the ETC pixels horizontally - const uint8_t Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4::Block4x4(void) - { - Init(); - } - Block4x4::~Block4x4() - { - m_pimageSource = nullptr; - - if (m_pencoding) - { - delete m_pencoding; - m_pencoding = nullptr; - } - } - - void Block4x4::Init() { - m_pimageSource = nullptr; - m_pencoding = nullptr; - - m_uiSourceH = 0; - m_uiSourceV = 0; - - m_sourcealphamix = SourceAlphaMix::UNKNOWN; - //m_boolBorderPixels = false; - m_boolPunchThroughPixels = false; - m_hasColorPixels = true; - - //m_errormetric = ErrorMetric::NUMERIC; - } - - Block4x4Encoding* Block4x4::NewEncoderIfNeeded(Image::Format format) - { - Block4x4Encoding* p_encoding = m_pencoding; - if (!p_encoding) - { - switch(format) { - case Image::Format::RGB8: - case Image::Format::SRGB8: - p_encoding = new Block4x4Encoding_RGB8; - break; - - case Image::Format::RGBA8: - case Image::Format::SRGBA8: - p_encoding = new Block4x4Encoding_RGBA8; - break; - - // don't really care about using ETC1 or A1 - case Image::Format::ETC1: - p_encoding = new Block4x4Encoding_ETC1; - break; - - case Image::Format::RGB8A1: - case Image::Format::SRGB8A1: - p_encoding = new Block4x4Encoding_RGB8A1; - break; - - default: - assert(false); - break; - } - } - return p_encoding; - } - - void Block4x4::Encode(Image *a_pimageSource, - unsigned int a_uiSourceH, unsigned int a_uiSourceV, - unsigned char *a_paucEncodingBits) - { - // this is use the same encoding over and over, so don't delete existing - Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat()); - ErrorMetric errorMetric = a_pimageSource->GetErrorMetric(); - - m_pencoding = nullptr; - Block4x4::Init(); - - m_pimageSource = a_pimageSource; - - m_uiSourceH = a_uiSourceH; - m_uiSourceV = a_uiSourceV; - //m_errormetric = errorMetric; - m_pencoding = p_encoding; - - SetSourcePixels(); - - m_pencoding->Encode(this, m_afrgbaSource, - a_paucEncodingBits, errorMetric); - - } - - - // ---------------------------------------------------------------------------------------------------- - // initialization of encoding state from a prior encoding using encoding bits - // [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource - // a_paucEncodingBits is the place to read the prior encoding - // a_imageformat is used to determine how to interpret a_paucEncodingBits - // a_errormetric was used for the prior encoding - // - void Block4x4::Decode( - unsigned int a_uiSourceH, unsigned int a_uiSourceV, - unsigned char *a_paucEncodingBits, - Image *a_pimageSource, - uint16_t iterationCount) - { - // this is use the same encoding over and over, so don't delete existing - Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat()); - ErrorMetric errorMetric = a_pimageSource->GetErrorMetric(); - - //delete m_pencoding; - m_pencoding = nullptr; - Block4x4::Init(); - - m_pimageSource = a_pimageSource; - - m_uiSourceH = a_uiSourceH; - m_uiSourceV = a_uiSourceV; - //m_errormetric = errorMetric; - m_pencoding = p_encoding; - - if (m_pimageSource->HasSourcePixels()) { - SetSourcePixels(); - - m_pencoding->Decode(this, a_paucEncodingBits, m_afrgbaSource, errorMetric, iterationCount); - } - else { - // pure decode - m_pencoding->Decode(this, a_paucEncodingBits, nullptr, errorMetric, iterationCount); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set source pixels from m_pimageSource - // set m_alphamix - // - void Block4x4::SetSourcePixels(void) - { - // copy source to consecutive memory locations - // convert from image horizontal scan to block vertical scan - int uiPixel = 0; - for (int x = 0; x < 4; x++) - { - int uiSourcePixelH = m_uiSourceH + x; - - for (int y = 0; y < 4; y++) - { - int uiSourcePixelV = m_uiSourceV + y; - - ColorFloatRGBA pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV); - - ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; - sourcePixel = pfrgbaSource; - uiPixel++; - } - } - - //---------------------------------------- - - m_hasColorPixels = false; - for (uiPixel = 0; uiPixel < 16; ++uiPixel) - { - ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; - - // this is doing fp equality - if (sourcePixel.fR != sourcePixel.fG || sourcePixel.fR != sourcePixel.fB) - { - m_hasColorPixels = true; - break; - } - } - - //---------------------------------------- - - // alpha census - int uiTransparentSourcePixels = 0; - int uiOpaqueSourcePixels = 0; - - Image::Format imageformat = m_pimageSource->GetFormat(); - - for (uiPixel = 0; uiPixel < 16; ++uiPixel) - { - ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; - - // for formats with no alpha, set source alpha to 1 - if (imageformat == Image::Format::ETC1 || - imageformat == Image::Format::RGB8 || - imageformat == Image::Format::SRGB8) - { - sourcePixel.fA = 1.0f; - } - - // for RGB8A1, set source alpha to 0.0 or 1.0 - // set punch through flag - else if (imageformat == Image::Format::RGB8A1 || - imageformat == Image::Format::SRGB8A1) - { - if (sourcePixel.fA >= 0.5f) - { - sourcePixel.fA = 1.0f; - } - else - { - sourcePixel.fA = 0.0f; - m_boolPunchThroughPixels = true; - } - } - - if (sourcePixel.fA == 1.0f) - { - uiOpaqueSourcePixels++; - } - else if (sourcePixel.fA == 0.0f) - { - // TODO: an assumption here that R/G/B are 0, but with multichannel that's not the case - // A could be all 0, but rgb contain valid channel content - uiTransparentSourcePixels++; - } - } - - // This only applies for RGBA (premul weighted calcs) - if (uiOpaqueSourcePixels == PIXELS) - { - m_sourcealphamix = SourceAlphaMix::OPAQUE; - } - else if (uiTransparentSourcePixels == PIXELS) - { - // TODO: could check rgb for all 0, and then set TRANSPARENT - m_sourcealphamix = SourceAlphaMix::TRANSPARENT; - - // TODO: nothing setting ALL_ZERO_ALPHA. Could look at all rgb to identify that. - - //(m_pimageSource->GetErrorMetric() == ErrorMetric::NUMERIC || m_pimageSource->GetErrorMetric() == ErrorMetric::RGBX) ? SourceAlphaMix::ALL_ZERO_ALPHA : - // SourceAlphaMix::TRANSPARENT; - } - else - { - m_sourcealphamix = SourceAlphaMix::TRANSLUCENT; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // return a name for the encoding mode - // -// const char * Block4x4::GetEncodingModeName(void) -// { -// -// switch (m_pencoding->GetMode()) -// { -// case Block4x4Encoding::MODE_ETC1: -// return "ETC1"; -// case Block4x4Encoding::MODE_T: -// return "T"; -// case Block4x4Encoding::MODE_H: -// return "H"; -// case Block4x4Encoding::MODE_PLANAR: -// return "PLANAR"; -// default: -// return "???"; -// } -// } - - // ---------------------------------------------------------------------------------------------------- - // - -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4.cpp + +Implements the state associated with each 4x4 block of pixels in an image + +Source images that are not a multiple of 4x4 are extended to fill the Block4x4 using pixels with an +alpha of NAN + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4.h" + +//#include "EtcColor.h" +#include "EtcImage.h" +#include "EtcColorFloatRGBA.h" + +// only the rgb/a encoders use Block4x4 +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4Encoding_ETC1.h" +#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcBlock4x4Encoding_RGBA8.h" +#include "EtcBlock4x4Encoding_RGB8A1.h" + +#include +#include +#include + +namespace Etc +{ + // ETC pixels are scanned vertically. + // this mapping is for when someone wants to scan the ETC pixels horizontally + const uint8_t Block4x4::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4::Block4x4(void) + { + Init(); + } + Block4x4::~Block4x4() + { + m_pimageSource = nullptr; + + if (m_pencoding) + { + delete m_pencoding; + m_pencoding = nullptr; + } + } + + void Block4x4::Init() { + m_pimageSource = nullptr; + m_pencoding = nullptr; + + m_uiSourceH = 0; + m_uiSourceV = 0; + + m_sourcealphamix = SourceAlphaMix::UNKNOWN; + //m_boolBorderPixels = false; + m_boolPunchThroughPixels = false; + m_hasColorPixels = true; + + //m_errormetric = ErrorMetric::NUMERIC; + } + + Block4x4Encoding* Block4x4::NewEncoderIfNeeded(Image::Format format) + { + Block4x4Encoding* p_encoding = m_pencoding; + if (!p_encoding) + { + switch(format) { + case Image::Format::RGB8: + case Image::Format::SRGB8: + p_encoding = new Block4x4Encoding_RGB8; + break; + + case Image::Format::RGBA8: + case Image::Format::SRGBA8: + p_encoding = new Block4x4Encoding_RGBA8; + break; + + // don't really care about using ETC1 or A1 + case Image::Format::ETC1: + p_encoding = new Block4x4Encoding_ETC1; + break; + + case Image::Format::RGB8A1: + case Image::Format::SRGB8A1: + p_encoding = new Block4x4Encoding_RGB8A1; + break; + + default: + assert(false); + break; + } + } + return p_encoding; + } + + void Block4x4::Encode(Image *a_pimageSource, + unsigned int a_uiSourceH, unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits) + { + // this is use the same encoding over and over, so don't delete existing + Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat()); + ErrorMetric errorMetric = a_pimageSource->GetErrorMetric(); + + m_pencoding = nullptr; + Block4x4::Init(); + + m_pimageSource = a_pimageSource; + + m_uiSourceH = a_uiSourceH; + m_uiSourceV = a_uiSourceV; + //m_errormetric = errorMetric; + m_pencoding = p_encoding; + + SetSourcePixels(); + + m_pencoding->Encode(this, m_afrgbaSource, + a_paucEncodingBits, errorMetric); + + } + + + // ---------------------------------------------------------------------------------------------------- + // initialization of encoding state from a prior encoding using encoding bits + // [a_uiSourceH,a_uiSourceV] is the location of the block in a_pimageSource + // a_paucEncodingBits is the place to read the prior encoding + // a_imageformat is used to determine how to interpret a_paucEncodingBits + // a_errormetric was used for the prior encoding + // + void Block4x4::Decode( + unsigned int a_uiSourceH, unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + Image *a_pimageSource, + uint16_t iterationCount) + { + // this is use the same encoding over and over, so don't delete existing + Etc::Block4x4Encoding* p_encoding = NewEncoderIfNeeded(a_pimageSource->GetFormat()); + ErrorMetric errorMetric = a_pimageSource->GetErrorMetric(); + + //delete m_pencoding; + m_pencoding = nullptr; + Block4x4::Init(); + + m_pimageSource = a_pimageSource; + + m_uiSourceH = a_uiSourceH; + m_uiSourceV = a_uiSourceV; + //m_errormetric = errorMetric; + m_pencoding = p_encoding; + + if (m_pimageSource->HasSourcePixels()) { + SetSourcePixels(); + + m_pencoding->Decode(this, a_paucEncodingBits, m_afrgbaSource, errorMetric, iterationCount); + } + else { + // pure decode + m_pencoding->Decode(this, a_paucEncodingBits, nullptr, errorMetric, iterationCount); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set source pixels from m_pimageSource + // set m_alphamix + // + void Block4x4::SetSourcePixels(void) + { + // copy source to consecutive memory locations + // convert from image horizontal scan to block vertical scan + int uiPixel = 0; + for (int x = 0; x < 4; x++) + { + int uiSourcePixelH = m_uiSourceH + x; + + for (int y = 0; y < 4; y++) + { + int uiSourcePixelV = m_uiSourceV + y; + + ColorFloatRGBA pfrgbaSource = m_pimageSource->GetSourcePixel(uiSourcePixelH, uiSourcePixelV); + + ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; + sourcePixel = pfrgbaSource; + uiPixel++; + } + } + + //---------------------------------------- + + m_hasColorPixels = false; + for (uiPixel = 0; uiPixel < 16; ++uiPixel) + { + ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; + + // this is doing fp equality + if (sourcePixel.fR != sourcePixel.fG || sourcePixel.fR != sourcePixel.fB) + { + m_hasColorPixels = true; + break; + } + } + + //---------------------------------------- + + // alpha census + int uiTransparentSourcePixels = 0; + int uiOpaqueSourcePixels = 0; + + Image::Format imageformat = m_pimageSource->GetFormat(); + + for (uiPixel = 0; uiPixel < 16; ++uiPixel) + { + ColorFloatRGBA& sourcePixel = m_afrgbaSource[uiPixel]; + + // for formats with no alpha, set source alpha to 1 + if (imageformat == Image::Format::ETC1 || + imageformat == Image::Format::RGB8 || + imageformat == Image::Format::SRGB8) + { + sourcePixel.fA = 1.0f; + } + + // for RGB8A1, set source alpha to 0.0 or 1.0 + // set punch through flag + else if (imageformat == Image::Format::RGB8A1 || + imageformat == Image::Format::SRGB8A1) + { + if (sourcePixel.fA >= 0.5f) + { + sourcePixel.fA = 1.0f; + } + else + { + sourcePixel.fA = 0.0f; + m_boolPunchThroughPixels = true; + } + } + + if (sourcePixel.fA == 1.0f) + { + uiOpaqueSourcePixels++; + } + else if (sourcePixel.fA == 0.0f) + { + // TODO: an assumption here that R/G/B are 0, but with multichannel that's not the case + // A could be all 0, but rgb contain valid channel content + uiTransparentSourcePixels++; + } + } + + // This only applies for RGBA (premul weighted calcs) + if (uiOpaqueSourcePixels == PIXELS) + { + m_sourcealphamix = SourceAlphaMix::OPAQUE; + } + else if (uiTransparentSourcePixels == PIXELS) + { + // TODO: could check rgb for all 0, and then set TRANSPARENT + m_sourcealphamix = SourceAlphaMix::TRANSPARENT; + + // TODO: nothing setting ALL_ZERO_ALPHA. Could look at all rgb to identify that. + + //(m_pimageSource->GetErrorMetric() == ErrorMetric::NUMERIC || m_pimageSource->GetErrorMetric() == ErrorMetric::RGBX) ? SourceAlphaMix::ALL_ZERO_ALPHA : + // SourceAlphaMix::TRANSPARENT; + } + else + { + m_sourcealphamix = SourceAlphaMix::TRANSLUCENT; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // return a name for the encoding mode + // +// const char * Block4x4::GetEncodingModeName(void) +// { +// +// switch (m_pencoding->GetMode()) +// { +// case Block4x4Encoding::MODE_ETC1: +// return "ETC1"; +// case Block4x4Encoding::MODE_T: +// return "T"; +// case Block4x4Encoding::MODE_H: +// return "H"; +// case Block4x4Encoding::MODE_PLANAR: +// return "PLANAR"; +// default: +// return "???"; +// } +// } + + // ---------------------------------------------------------------------------------------------------- + // + +} diff --git a/libkram/etc2comp/EtcBlock4x4.h b/libkram/etc2comp/EtcBlock4x4.h index 3e869938..518a7ba7 100644 --- a/libkram/etc2comp/EtcBlock4x4.h +++ b/libkram/etc2comp/EtcBlock4x4.h @@ -1,132 +1,132 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -//#include "EtcColor.h" -#include "EtcColorFloatRGBA.h" -//#include "EtcErrorMetric.h" -#include "EtcImage.h" -#include "EtcBlock4x4Encoding.h" - -namespace Etc -{ - //class Block4x4Encoding; - - // This base holds a 4x4 block, and is only used for RGB/RGBA encodings - class Block4x4 - { - public: - - static const unsigned int ROWS = 4; - static const unsigned int COLUMNS = 4; - static const unsigned int PIXELS = ROWS * COLUMNS; - - // the alpha mix for a 4x4 block of pixels - enum class SourceAlphaMix - { - UNKNOWN, - // - OPAQUE, // all 1.0 - TRANSPARENT, // all channels 0.0 - TRANSLUCENT, // not all opaque or transparent - ALL_ZERO_ALPHA // used for multichannel where all A = 0, but rgb contain data - }; - - typedef void (Block4x4::*EncoderFunctionPtr)(void); - - Block4x4(void); - ~Block4x4(); - - // called on first init of a block with/without multipass - void Encode(Image *a_pimageSource, - unsigned int a_uiSourceH, - unsigned int a_uiSourceV, - unsigned char *a_paucEncodingBits - ); - - // used on subsequent passes with multipass to decode from block for subsequent encodes - void Decode(unsigned int a_uiSourceH, - unsigned int a_uiSourceV, - unsigned char *a_paucEncodingBits, - Image *a_pimageSource, - uint16_t iterationCount); - - inline Block4x4Encoding * GetEncoding(void) - { - return m_pencoding; - } - - //---------------------- - - inline unsigned int GetSourceH(void) const - { - return m_uiSourceH; - } - - inline unsigned int GetSourceV(void) const - { - return m_uiSourceV; - } - - inline const ColorFloatRGBA * GetSource() const - { - return m_afrgbaSource; - } - - inline SourceAlphaMix GetSourceAlphaMix(void) const - { - return m_sourcealphamix; // or return from m_pimageSource->GetSourceAlphaMix() - } - - inline const Image * GetImageSource(void) const - { - return m_pimageSource; - } - - inline bool HasPunchThroughPixels(void) const - { - return m_boolPunchThroughPixels; - } - - // gray vs. color - inline bool HasColorPixels(void) const - { - return m_hasColorPixels; - } - - private: - Block4x4Encoding* NewEncoderIfNeeded(Image::Format format); - void Init(); - - void SetSourcePixels(void); - - static const uint8_t s_auiPixelOrderHScan[PIXELS]; - - Image *m_pimageSource; - unsigned int m_uiSourceH; - unsigned int m_uiSourceV; - ColorFloatRGBA m_afrgbaSource[PIXELS]; // vertical scan (Not std. pixel order, it's stored transposed) - - SourceAlphaMix m_sourcealphamix; - bool m_boolPunchThroughPixels; // RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5 - bool m_hasColorPixels; - - Block4x4Encoding *m_pencoding; - - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +//#include "EtcColor.h" +#include "EtcColorFloatRGBA.h" +//#include "EtcErrorMetric.h" +#include "EtcImage.h" +#include "EtcBlock4x4Encoding.h" + +namespace Etc +{ + //class Block4x4Encoding; + + // This base holds a 4x4 block, and is only used for RGB/RGBA encodings + class Block4x4 + { + public: + + static const unsigned int ROWS = 4; + static const unsigned int COLUMNS = 4; + static const unsigned int PIXELS = ROWS * COLUMNS; + + // the alpha mix for a 4x4 block of pixels + enum class SourceAlphaMix + { + UNKNOWN, + // + OPAQUE, // all 1.0 + TRANSPARENT, // all channels 0.0 + TRANSLUCENT, // not all opaque or transparent + ALL_ZERO_ALPHA // used for multichannel where all A = 0, but rgb contain data + }; + + typedef void (Block4x4::*EncoderFunctionPtr)(void); + + Block4x4(void); + ~Block4x4(); + + // called on first init of a block with/without multipass + void Encode(Image *a_pimageSource, + unsigned int a_uiSourceH, + unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits + ); + + // used on subsequent passes with multipass to decode from block for subsequent encodes + void Decode(unsigned int a_uiSourceH, + unsigned int a_uiSourceV, + unsigned char *a_paucEncodingBits, + Image *a_pimageSource, + uint16_t iterationCount); + + inline Block4x4Encoding * GetEncoding(void) + { + return m_pencoding; + } + + //---------------------- + + inline unsigned int GetSourceH(void) const + { + return m_uiSourceH; + } + + inline unsigned int GetSourceV(void) const + { + return m_uiSourceV; + } + + inline const ColorFloatRGBA * GetSource() const + { + return m_afrgbaSource; + } + + inline SourceAlphaMix GetSourceAlphaMix(void) const + { + return m_sourcealphamix; // or return from m_pimageSource->GetSourceAlphaMix() + } + + inline const Image * GetImageSource(void) const + { + return m_pimageSource; + } + + inline bool HasPunchThroughPixels(void) const + { + return m_boolPunchThroughPixels; + } + + // gray vs. color + inline bool HasColorPixels(void) const + { + return m_hasColorPixels; + } + + private: + Block4x4Encoding* NewEncoderIfNeeded(Image::Format format); + void Init(); + + void SetSourcePixels(void); + + static const uint8_t s_auiPixelOrderHScan[PIXELS]; + + Image *m_pimageSource; + unsigned int m_uiSourceH; + unsigned int m_uiSourceV; + ColorFloatRGBA m_afrgbaSource[PIXELS]; // vertical scan (Not std. pixel order, it's stored transposed) + + SourceAlphaMix m_sourcealphamix; + bool m_boolPunchThroughPixels; // RGB8A1 or SRGB8A1 with any pixels with alpha < 0.5 + bool m_hasColorPixels; + + Block4x4Encoding *m_pencoding; + + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding.cpp b/libkram/etc2comp/EtcBlock4x4Encoding.cpp index fcbf5ee9..2a0068b6 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding.cpp @@ -1,124 +1,124 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding.cpp - -Block4x4Encoding is the abstract base class for the different encoders. Each encoder targets a -particular file format (e.g. ETC1, RGB8, RGBA8, R11) - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding.h" - -#include "EtcBlock4x4EncodingBits.h" -#include "EtcBlock4x4.h" - -#include -#include -#include - -namespace Etc -{ - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding::Block4x4Encoding(void) - { - Init(); - } - - void Block4x4Encoding::Init() - { - m_pblockParent = nullptr; - - m_pafrgbaSource = nullptr; - - m_fError = 0.0f; - - m_mode = MODE_UNKNOWN; - - m_uiEncodingIterations = 0; - m_boolDone = false; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, 1.0f); - } - } - - // ---------------------------------------------------------------------------------------------------- - // initialize the generic encoding for a 4x4 block - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // init the decoded pixels to -1 to mark them as undefined - // init the error to -1 to mark it as undefined - // - void Block4x4Encoding::Init(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) - { - Init(); - - m_pblockParent = a_pblockParent; - m_pafrgbaSource = a_pafrgbaSource; - m_errormetric = a_errormetric; - - m_uiEncodingIterations = iterationCount; - } - - // ---------------------------------------------------------------------------------------------------- - - void Block4x4Encoding::SetDoneIfPerfect() - { - float kErrorTolerance = 0.0f; - - // instead of comparing to 0 which is almost never achieved in float, - // use a normalized 8-bit tolerance. See A8 and R11 code for kErrorTolerance. -#define ADD_TOLERANCE 1 -#if ADD_TOLERANCE - // 16 pixels accumulated, all within 1/255 of final value, and then weights - static const float kErrorToleranceRec709 = (1.0f / 255.0f) * (1.0f / 255.0f) * 5.0f * 16.0f; - static const float kErrorToleranceNumeric = (1.0f / 255.0f) * (1.0f / 255.0f) * 3.0f * 16.0f; - static const float kErrorToleranceGray = (1.0f / 255.0f) * (1.0f / 255.0f) * 1.0f * 16.0f; - - switch(m_errormetric) - { - case ErrorMetric::GRAY: - kErrorTolerance = kErrorToleranceGray; - break; - case ErrorMetric::NUMERIC: - kErrorTolerance = kErrorToleranceNumeric; - break; - case ErrorMetric::REC709: - kErrorTolerance = kErrorToleranceRec709; - break; - } -#endif - - assert(m_fError >= 0.0f); - if (m_fError <= kErrorTolerance) - { - m_boolDone = true; - } - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc - +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding.cpp + +Block4x4Encoding is the abstract base class for the different encoders. Each encoder targets a +particular file format (e.g. ETC1, RGB8, RGBA8, R11) + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include +#include +#include + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding::Block4x4Encoding(void) + { + Init(); + } + + void Block4x4Encoding::Init() + { + m_pblockParent = nullptr; + + m_pafrgbaSource = nullptr; + + m_fError = 0.0f; + + m_mode = MODE_UNKNOWN; + + m_uiEncodingIterations = 0; + m_boolDone = false; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(0.0f, 0.0f, 0.0f, 1.0f); + } + } + + // ---------------------------------------------------------------------------------------------------- + // initialize the generic encoding for a 4x4 block + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // init the decoded pixels to -1 to mark them as undefined + // init the error to -1 to mark it as undefined + // + void Block4x4Encoding::Init(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) + { + Init(); + + m_pblockParent = a_pblockParent; + m_pafrgbaSource = a_pafrgbaSource; + m_errormetric = a_errormetric; + + m_uiEncodingIterations = iterationCount; + } + + // ---------------------------------------------------------------------------------------------------- + + void Block4x4Encoding::SetDoneIfPerfect() + { + float kErrorTolerance = 0.0f; + + // instead of comparing to 0 which is almost never achieved in float, + // use a normalized 8-bit tolerance. See A8 and R11 code for kErrorTolerance. +#define ADD_TOLERANCE 1 +#if ADD_TOLERANCE + // 16 pixels accumulated, all within 1/255 of final value, and then weights + static const float kErrorToleranceRec709 = (1.0f / 255.0f) * (1.0f / 255.0f) * 5.0f * 16.0f; + static const float kErrorToleranceNumeric = (1.0f / 255.0f) * (1.0f / 255.0f) * 3.0f * 16.0f; + static const float kErrorToleranceGray = (1.0f / 255.0f) * (1.0f / 255.0f) * 1.0f * 16.0f; + + switch(m_errormetric) + { + case ErrorMetric::GRAY: + kErrorTolerance = kErrorToleranceGray; + break; + case ErrorMetric::NUMERIC: + kErrorTolerance = kErrorToleranceNumeric; + break; + case ErrorMetric::REC709: + kErrorTolerance = kErrorToleranceRec709; + break; + } +#endif + + assert(m_fError >= 0.0f); + if (m_fError <= kErrorTolerance) + { + m_boolDone = true; + } + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc + diff --git a/libkram/etc2comp/EtcBlock4x4Encoding.h b/libkram/etc2comp/EtcBlock4x4Encoding.h index 33bfe880..91f0cf6a 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding.h @@ -1,220 +1,220 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcColorFloatRGBA.h" - -#include "EtcErrorMetric.h" - -#include -#include - -namespace Etc -{ - class Block4x4; - - // abstract base class only for rgb/a encodings - class Block4x4Encoding - { - public: - - static const int ROWS = 4; - static const int COLUMNS = 4; - static const int PIXELS = ROWS * COLUMNS; - - typedef enum - { - MODE_UNKNOWN, - // - MODE_ETC1, - MODE_T, - MODE_H, - MODE_PLANAR, - MODES - } Mode; - - Block4x4Encoding(void); - virtual ~Block4x4Encoding(void) {} - - virtual void Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, - ErrorMetric a_errormetric) = 0; - - virtual void Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) = 0; - - // is is only called on S/RGBA format to copy alpha into decoded pixels of encoding - virtual void DecodeAlpha() { } - - // perform an iteration of the encoding - // the first iteration must generate a complete, valid (if poor) encoding - virtual void PerformIteration(float a_fEffort) = 0; - - // write output block - virtual void SetEncodingBits(void) = 0; - - // the count of the last iteration, can be useful in multipass encoding - inline uint16_t GetIterationCount(void) const - { - return m_uiEncodingIterations; - } - - //------------------- - - //void CalcBlockError(void); - //float CalcPixelError(const ColorFloatRGBA& a_frgbaDecodedColor, int uiPixel) const; - - inline float GetError(void) const - { - return m_fError; - } - - inline Mode GetMode(void) const - { - return m_mode; - } - - inline bool IsDone(void) const - { - return m_boolDone; - } - - void SetDoneIfPerfect(); - - - inline const ColorFloatRGBA& GetDecodedPixel(int uiPixel) const - { - return m_afrgbaDecodedColors[uiPixel]; - } - - // CalcPixelError is a major hotspot. Called in inner loops. - // calculate the error between the source pixel and the decoded pixel - // the error amount is base on the error metric - inline float CalcPixelError(const ColorFloatRGBA& encodedPixel, - int uiPixel) const - { - - const ColorFloatRGBA& sourcePixel = m_pafrgbaSource[uiPixel]; - float error = 0.0f; - - // don't use alpha in any calcs. This is only RGB error. - - switch(m_errormetric) - { - - case ErrorMetric::GRAY: - { - error = encodedPixel.fR - sourcePixel.fR; - error *= error; - - break; - } - - case ErrorMetric::REC709: - case ErrorMetric::NUMERIC: - { - float fDX = encodedPixel.fR - sourcePixel.fR; - float fDY = encodedPixel.fG - sourcePixel.fG; - float fDZ = encodedPixel.fB - sourcePixel.fB; - - error = fDX*fDX + fDY*fDY + fDZ*fDZ; - break; - } - - /* This slows down encoding 28s vs. 20s when not inlined, so stop using it - also the src isn't cached. - - case ErrorMetric::REC709: - { - //assert(a_fDecodedAlpha >= 0.0f); - - // YCbCr of source and encodedColor - // TODO: could cache sourcePixel values to move out of loops - float fLuma1 = sourcePixel.fR*0.2126f + sourcePixel.fG*0.7152f + sourcePixel.fB*0.0722f; - float fChromaR1 = (sourcePixel.fR - fLuma1) * (0.5f / (1.0f - 0.2126f)); - float fChromaB1 = (sourcePixel.fB - fLuma1) * (0.5f / (1.0f - 0.0722f)); - - float fLuma2 = encodedPixel.fR*0.2126f + encodedPixel.fG*0.7152f + encodedPixel.fB*0.0722f; - float fChromaR2 = (encodedPixel.fR - fLuma2) * (0.5f / (1.0f - 0.2126f)); - float fChromaB2 = (encodedPixel.fB - fLuma2) * (0.5f / (1.0f - 0.0722f)); - - float fDeltaL = fLuma1 - fLuma2; - float fDeltaCr = fChromaR1 - fChromaR2; - float fDeltaCb = fChromaB1 - fChromaB2; - - const float LUMA_WEIGHT = 3.0f; - const float CHROMA_RED_WEIGHT = 1.0f; - const float CHROMA_BLUE_WEIGHT = 1.0f; - - // Favor Luma accuracy over Chroma - error = LUMA_WEIGHT * fDeltaL*fDeltaL + - CHROMA_RED_WEIGHT * fDeltaCr*fDeltaCr + - CHROMA_BLUE_WEIGHT * fDeltaCb*fDeltaCb; - - break; - } - */ - - } - - return error; - } - - // CalcBlockError is a major hotspot. Called in inner loops. - // calculate the error for the block by summing the pixel errors - inline void CalcBlockError(void) - { - m_fError = 0.0f; - - if (m_pafrgbaSource) - { - for (int uiPixel = 0; uiPixel < (int)PIXELS; uiPixel++) - { - m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], uiPixel); - } - } - } - - protected: - void Init(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount); - - Block4x4 *m_pblockParent; - const ColorFloatRGBA *m_pafrgbaSource; - - ColorFloatRGBA m_afrgbaDecodedColors[PIXELS]; // decoded RGB components, ignore Alpha - float m_fError; // error for RGB relative to m_pafrgbaSource.rgb - - // intermediate encoding - Mode m_mode; - - unsigned int m_uiEncodingIterations; - bool m_boolDone; // all iterations have been done - ErrorMetric m_errormetric; - - private: - void Init(); - - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +#include "EtcErrorMetric.h" + +#include +#include + +namespace Etc +{ + class Block4x4; + + // abstract base class only for rgb/a encodings + class Block4x4Encoding + { + public: + + static const int ROWS = 4; + static const int COLUMNS = 4; + static const int PIXELS = ROWS * COLUMNS; + + typedef enum + { + MODE_UNKNOWN, + // + MODE_ETC1, + MODE_T, + MODE_H, + MODE_PLANAR, + MODES + } Mode; + + Block4x4Encoding(void); + virtual ~Block4x4Encoding(void) {} + + virtual void Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) = 0; + + virtual void Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) = 0; + + // is is only called on S/RGBA format to copy alpha into decoded pixels of encoding + virtual void DecodeAlpha() { } + + // perform an iteration of the encoding + // the first iteration must generate a complete, valid (if poor) encoding + virtual void PerformIteration(float a_fEffort) = 0; + + // write output block + virtual void SetEncodingBits(void) = 0; + + // the count of the last iteration, can be useful in multipass encoding + inline uint16_t GetIterationCount(void) const + { + return m_uiEncodingIterations; + } + + //------------------- + + //void CalcBlockError(void); + //float CalcPixelError(const ColorFloatRGBA& a_frgbaDecodedColor, int uiPixel) const; + + inline float GetError(void) const + { + return m_fError; + } + + inline Mode GetMode(void) const + { + return m_mode; + } + + inline bool IsDone(void) const + { + return m_boolDone; + } + + void SetDoneIfPerfect(); + + + inline const ColorFloatRGBA& GetDecodedPixel(int uiPixel) const + { + return m_afrgbaDecodedColors[uiPixel]; + } + + // CalcPixelError is a major hotspot. Called in inner loops. + // calculate the error between the source pixel and the decoded pixel + // the error amount is base on the error metric + inline float CalcPixelError(const ColorFloatRGBA& encodedPixel, + int uiPixel) const + { + + const ColorFloatRGBA& sourcePixel = m_pafrgbaSource[uiPixel]; + float error = 0.0f; + + // don't use alpha in any calcs. This is only RGB error. + + switch(m_errormetric) + { + + case ErrorMetric::GRAY: + { + error = encodedPixel.fR - sourcePixel.fR; + error *= error; + + break; + } + + case ErrorMetric::REC709: + case ErrorMetric::NUMERIC: + { + float fDX = encodedPixel.fR - sourcePixel.fR; + float fDY = encodedPixel.fG - sourcePixel.fG; + float fDZ = encodedPixel.fB - sourcePixel.fB; + + error = fDX*fDX + fDY*fDY + fDZ*fDZ; + break; + } + + /* This slows down encoding 28s vs. 20s when not inlined, so stop using it + also the src isn't cached. + + case ErrorMetric::REC709: + { + //assert(a_fDecodedAlpha >= 0.0f); + + // YCbCr of source and encodedColor + // TODO: could cache sourcePixel values to move out of loops + float fLuma1 = sourcePixel.fR*0.2126f + sourcePixel.fG*0.7152f + sourcePixel.fB*0.0722f; + float fChromaR1 = (sourcePixel.fR - fLuma1) * (0.5f / (1.0f - 0.2126f)); + float fChromaB1 = (sourcePixel.fB - fLuma1) * (0.5f / (1.0f - 0.0722f)); + + float fLuma2 = encodedPixel.fR*0.2126f + encodedPixel.fG*0.7152f + encodedPixel.fB*0.0722f; + float fChromaR2 = (encodedPixel.fR - fLuma2) * (0.5f / (1.0f - 0.2126f)); + float fChromaB2 = (encodedPixel.fB - fLuma2) * (0.5f / (1.0f - 0.0722f)); + + float fDeltaL = fLuma1 - fLuma2; + float fDeltaCr = fChromaR1 - fChromaR2; + float fDeltaCb = fChromaB1 - fChromaB2; + + const float LUMA_WEIGHT = 3.0f; + const float CHROMA_RED_WEIGHT = 1.0f; + const float CHROMA_BLUE_WEIGHT = 1.0f; + + // Favor Luma accuracy over Chroma + error = LUMA_WEIGHT * fDeltaL*fDeltaL + + CHROMA_RED_WEIGHT * fDeltaCr*fDeltaCr + + CHROMA_BLUE_WEIGHT * fDeltaCb*fDeltaCb; + + break; + } + */ + + } + + return error; + } + + // CalcBlockError is a major hotspot. Called in inner loops. + // calculate the error for the block by summing the pixel errors + inline void CalcBlockError(void) + { + m_fError = 0.0f; + + if (m_pafrgbaSource) + { + for (int uiPixel = 0; uiPixel < (int)PIXELS; uiPixel++) + { + m_fError += CalcPixelError(m_afrgbaDecodedColors[uiPixel], uiPixel); + } + } + } + + protected: + void Init(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount); + + Block4x4 *m_pblockParent; + const ColorFloatRGBA *m_pafrgbaSource; + + ColorFloatRGBA m_afrgbaDecodedColors[PIXELS]; // decoded RGB components, ignore Alpha + float m_fError; // error for RGB relative to m_pafrgbaSource.rgb + + // intermediate encoding + Mode m_mode; + + unsigned int m_uiEncodingIterations; + bool m_boolDone; // all iterations have been done + ErrorMetric m_errormetric; + + private: + void Init(); + + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4EncodingBits.h b/libkram/etc2comp/EtcBlock4x4EncodingBits.h index d0267978..e0b30ae9 100644 --- a/libkram/etc2comp/EtcBlock4x4EncodingBits.h +++ b/libkram/etc2comp/EtcBlock4x4EncodingBits.h @@ -1,315 +1,315 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace Etc -{ - - // ################################################################################ - // Block4x4EncodingBits - // Base class for Block4x4EncodingBits_XXXX - // ################################################################################ - - class Block4x4EncodingBits - { - public: - - enum class Format - { - UNKNOWN, - // - RGB8, - RGBA8, - R11, - RG11, - RGB8A1, - // - FORMATS - }; - - static unsigned int GetBytesPerBlock(Format a_format) - { - switch (a_format) - { - case Block4x4EncodingBits::Format::RGB8: - case Block4x4EncodingBits::Format::R11: - case Block4x4EncodingBits::Format::RGB8A1: - return 8; - break; - - case Block4x4EncodingBits::Format::RGBA8: - case Block4x4EncodingBits::Format::RG11: - return 16; - break; - - default: - return 0; - break; - } - - } - - }; - - // ################################################################################ - // Block4x4EncodingBits_RGB8 - // Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8 - // ################################################################################ - - class Block4x4EncodingBits_RGB8 - { - public: - - static const unsigned int BYTES_PER_BLOCK = 8; - - inline Block4x4EncodingBits_RGB8(void) - { - assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK); - - for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++) - { - auc[uiByte] = 0; - } - - } - - typedef struct - { - unsigned red2 : 4; - unsigned red1 : 4; - // - unsigned green2 : 4; - unsigned green1 : 4; - // - unsigned blue2 : 4; - unsigned blue1 : 4; - // - unsigned flip : 1; - unsigned diff : 1; - unsigned cw2 : 3; - unsigned cw1 : 3; - // - unsigned int selectors; - } Individual; - - typedef struct - { - signed dred2 : 3; - unsigned red1 : 5; - // - signed dgreen2 : 3; - unsigned green1 : 5; - // - signed dblue2 : 3; - unsigned blue1 : 5; - // - unsigned flip : 1; - unsigned diff : 1; - unsigned cw2 : 3; - unsigned cw1 : 3; - // - unsigned int selectors; - } Differential; - - typedef struct - { - unsigned red1b : 2; - unsigned detect2 : 1; - unsigned red1a : 2; - unsigned detect1 : 3; - // - unsigned blue1 : 4; - unsigned green1 : 4; - // - unsigned green2 : 4; - unsigned red2 : 4; - // - unsigned db : 1; - unsigned diff : 1; - unsigned da : 2; - unsigned blue2 : 4; - // - unsigned int selectors; - } T; - - typedef struct - { - unsigned green1a : 3; - unsigned red1 : 4; - unsigned detect1 : 1; - // - unsigned blue1b : 2; - unsigned detect3 : 1; - unsigned blue1a : 1; - unsigned green1b : 1; - unsigned detect2 : 3; - // - unsigned green2a : 3; - unsigned red2 : 4; - unsigned blue1c : 1; - // - unsigned db : 1; - unsigned diff : 1; - unsigned da : 1; - unsigned blue2 : 4; - unsigned green2b : 1; - // - unsigned int selectors; - } H; - - typedef struct - { - unsigned originGreen1 : 1; - unsigned originRed : 6; - unsigned detect1 : 1; - // - unsigned originBlue1 : 1; - unsigned originGreen2 : 6; - unsigned detect2 : 1; - // - unsigned originBlue3 : 2; - unsigned detect4 : 1; - unsigned originBlue2 : 2; - unsigned detect3 : 3; - // - unsigned horizRed2 : 1; - unsigned diff : 1; - unsigned horizRed1 : 5; - unsigned originBlue4 : 1; - // - unsigned horizBlue1: 1; - unsigned horizGreen : 7; - // - unsigned vertRed1 : 3; - unsigned horizBlue2 : 5; - // - unsigned vertGreen1 : 5; - unsigned vertRed2 : 3; - // - unsigned vertBlue : 6; - unsigned vertGreen2 : 2; - } Planar; - - union - { - unsigned char auc[BYTES_PER_BLOCK]; - unsigned long int ul; - Individual individual; - Differential differential; - T t; - H h; - Planar planar; - }; - - }; - - // ################################################################################ - // Block4x4EncodingBits_A8 - // Encoding bits for the A portion of RGBA8 - // ################################################################################ - - class Block4x4EncodingBits_A8 - { - public: - - static const unsigned int BYTES_PER_BLOCK = 8; - static const unsigned int SELECTOR_BYTES = 6; - - typedef struct - { - unsigned base : 8; - unsigned table : 4; - unsigned multiplier : 4; - unsigned selectors0 : 8; - unsigned selectors1 : 8; - unsigned selectors2 : 8; - unsigned selectors3 : 8; - unsigned selectors4 : 8; - unsigned selectors5 : 8; - } Data; - - Data data; - - }; - - // ################################################################################ - // Block4x4EncodingBits_R11 - // Encoding bits for the R portion of R11 - // ################################################################################ - - class Block4x4EncodingBits_R11 - { - public: - - static const unsigned int BYTES_PER_BLOCK = 8; - static const unsigned int SELECTOR_BYTES = 6; - - typedef struct - { - unsigned base : 8; - unsigned table : 4; - unsigned multiplier : 4; - unsigned selectors0 : 8; - unsigned selectors1 : 8; - unsigned selectors2 : 8; - unsigned selectors3 : 8; - unsigned selectors4 : 8; - unsigned selectors5 : 8; - } Data; - - Data data; - - }; - - class Block4x4EncodingBits_RG11 - { - public: - - static const unsigned int BYTES_PER_BLOCK = 16; - static const unsigned int SELECTOR_BYTES = 12; - - typedef struct - { - //Red portion - unsigned baseR : 8; - unsigned tableIndexR : 4; - unsigned multiplierR : 4; - unsigned selectorsR0 : 8; - unsigned selectorsR1 : 8; - unsigned selectorsR2 : 8; - unsigned selectorsR3 : 8; - unsigned selectorsR4 : 8; - unsigned selectorsR5 : 8; - //Green portion - unsigned baseG : 8; - unsigned tableIndexG : 4; - unsigned multiplierG : 4; - unsigned selectorsG0 : 8; - unsigned selectorsG1 : 8; - unsigned selectorsG2 : 8; - unsigned selectorsG3 : 8; - unsigned selectorsG4 : 8; - unsigned selectorsG5 : 8; - } Data; - - Data data; - - }; - -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace Etc +{ + + // ################################################################################ + // Block4x4EncodingBits + // Base class for Block4x4EncodingBits_XXXX + // ################################################################################ + + class Block4x4EncodingBits + { + public: + + enum class Format + { + UNKNOWN, + // + RGB8, + RGBA8, + R11, + RG11, + RGB8A1, + // + FORMATS + }; + + static unsigned int GetBytesPerBlock(Format a_format) + { + switch (a_format) + { + case Block4x4EncodingBits::Format::RGB8: + case Block4x4EncodingBits::Format::R11: + case Block4x4EncodingBits::Format::RGB8A1: + return 8; + break; + + case Block4x4EncodingBits::Format::RGBA8: + case Block4x4EncodingBits::Format::RG11: + return 16; + break; + + default: + return 0; + break; + } + + } + + }; + + // ################################################################################ + // Block4x4EncodingBits_RGB8 + // Encoding bits for the RGB portion of ETC1, RGB8, RGB8A1 and RGBA8 + // ################################################################################ + + class Block4x4EncodingBits_RGB8 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + + inline Block4x4EncodingBits_RGB8(void) + { + assert(sizeof(Block4x4EncodingBits_RGB8) == BYTES_PER_BLOCK); + + for (unsigned int uiByte = 0; uiByte < BYTES_PER_BLOCK; uiByte++) + { + auc[uiByte] = 0; + } + + } + + typedef struct + { + unsigned red2 : 4; + unsigned red1 : 4; + // + unsigned green2 : 4; + unsigned green1 : 4; + // + unsigned blue2 : 4; + unsigned blue1 : 4; + // + unsigned flip : 1; + unsigned diff : 1; + unsigned cw2 : 3; + unsigned cw1 : 3; + // + unsigned int selectors; + } Individual; + + typedef struct + { + signed dred2 : 3; + unsigned red1 : 5; + // + signed dgreen2 : 3; + unsigned green1 : 5; + // + signed dblue2 : 3; + unsigned blue1 : 5; + // + unsigned flip : 1; + unsigned diff : 1; + unsigned cw2 : 3; + unsigned cw1 : 3; + // + unsigned int selectors; + } Differential; + + typedef struct + { + unsigned red1b : 2; + unsigned detect2 : 1; + unsigned red1a : 2; + unsigned detect1 : 3; + // + unsigned blue1 : 4; + unsigned green1 : 4; + // + unsigned green2 : 4; + unsigned red2 : 4; + // + unsigned db : 1; + unsigned diff : 1; + unsigned da : 2; + unsigned blue2 : 4; + // + unsigned int selectors; + } T; + + typedef struct + { + unsigned green1a : 3; + unsigned red1 : 4; + unsigned detect1 : 1; + // + unsigned blue1b : 2; + unsigned detect3 : 1; + unsigned blue1a : 1; + unsigned green1b : 1; + unsigned detect2 : 3; + // + unsigned green2a : 3; + unsigned red2 : 4; + unsigned blue1c : 1; + // + unsigned db : 1; + unsigned diff : 1; + unsigned da : 1; + unsigned blue2 : 4; + unsigned green2b : 1; + // + unsigned int selectors; + } H; + + typedef struct + { + unsigned originGreen1 : 1; + unsigned originRed : 6; + unsigned detect1 : 1; + // + unsigned originBlue1 : 1; + unsigned originGreen2 : 6; + unsigned detect2 : 1; + // + unsigned originBlue3 : 2; + unsigned detect4 : 1; + unsigned originBlue2 : 2; + unsigned detect3 : 3; + // + unsigned horizRed2 : 1; + unsigned diff : 1; + unsigned horizRed1 : 5; + unsigned originBlue4 : 1; + // + unsigned horizBlue1: 1; + unsigned horizGreen : 7; + // + unsigned vertRed1 : 3; + unsigned horizBlue2 : 5; + // + unsigned vertGreen1 : 5; + unsigned vertRed2 : 3; + // + unsigned vertBlue : 6; + unsigned vertGreen2 : 2; + } Planar; + + union + { + unsigned char auc[BYTES_PER_BLOCK]; + unsigned long int ul; + Individual individual; + Differential differential; + T t; + H h; + Planar planar; + }; + + }; + + // ################################################################################ + // Block4x4EncodingBits_A8 + // Encoding bits for the A portion of RGBA8 + // ################################################################################ + + class Block4x4EncodingBits_A8 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + static const unsigned int SELECTOR_BYTES = 6; + + typedef struct + { + unsigned base : 8; + unsigned table : 4; + unsigned multiplier : 4; + unsigned selectors0 : 8; + unsigned selectors1 : 8; + unsigned selectors2 : 8; + unsigned selectors3 : 8; + unsigned selectors4 : 8; + unsigned selectors5 : 8; + } Data; + + Data data; + + }; + + // ################################################################################ + // Block4x4EncodingBits_R11 + // Encoding bits for the R portion of R11 + // ################################################################################ + + class Block4x4EncodingBits_R11 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 8; + static const unsigned int SELECTOR_BYTES = 6; + + typedef struct + { + unsigned base : 8; + unsigned table : 4; + unsigned multiplier : 4; + unsigned selectors0 : 8; + unsigned selectors1 : 8; + unsigned selectors2 : 8; + unsigned selectors3 : 8; + unsigned selectors4 : 8; + unsigned selectors5 : 8; + } Data; + + Data data; + + }; + + class Block4x4EncodingBits_RG11 + { + public: + + static const unsigned int BYTES_PER_BLOCK = 16; + static const unsigned int SELECTOR_BYTES = 12; + + typedef struct + { + //Red portion + unsigned baseR : 8; + unsigned tableIndexR : 4; + unsigned multiplierR : 4; + unsigned selectorsR0 : 8; + unsigned selectorsR1 : 8; + unsigned selectorsR2 : 8; + unsigned selectorsR3 : 8; + unsigned selectorsR4 : 8; + unsigned selectorsR5 : 8; + //Green portion + unsigned baseG : 8; + unsigned tableIndexG : 4; + unsigned multiplierG : 4; + unsigned selectorsG0 : 8; + unsigned selectorsG1 : 8; + unsigned selectorsG2 : 8; + unsigned selectorsG3 : 8; + unsigned selectorsG4 : 8; + unsigned selectorsG5 : 8; + } Data; + + Data data; + + }; + +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp index 360e302e..8de857f4 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.cpp @@ -1,1335 +1,1335 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_ETC1.cpp - -Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1. This encoder is also -used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1 - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_ETC1.h" - -#include "EtcBlock4x4.h" -#include "EtcBlock4x4EncodingBits.h" -#include "EtcDifferentialTrys.h" - -#include -#include -#include -#include -#include - -namespace Etc -{ - - // pixel processing order if the flip bit = 0 (horizontal split) - const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - - // pixel processing order if the flip bit = 1 (vertical split) - const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }; - - // pixel processing order for horizontal scan (ETC normally does a vertical scan) - const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; - - // pixel indices for different block halves - const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; - const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; - const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 }; - const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 }; - - // CW ranges that the ETC1 decoders use - // CW is basically a contrast for the different selector bits, since these values are offsets to the base color - // the first axis in the array is indexed by the CW in the encoding bits - // the second axis in the array is indexed by the selector bits - float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] = - { - { 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f }, - { 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f }, - { 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f }, - { 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f }, - { 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f }, - { 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f }, - { 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f }, - { 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f } - }; - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void) - { - InitETC1(); - } - - Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {} - - void Block4x4Encoding_ETC1::InitETC1() - { - m_mode = MODE_ETC1; - m_boolDiff = false; - m_boolFlip = false; - m_frgbaColor1 = ColorFloatRGBA(); - m_frgbaColor2 = ColorFloatRGBA(); - m_uiCW1 = 0; - m_uiCW2 = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = 0; - //m_afDecodedAlphas[uiPixel] = 1.0f; - } - - // these aren't initialized - m_frgbaSourceAverageLeft = ColorFloatRGBA(); - m_frgbaSourceAverageRight = ColorFloatRGBA(); - m_frgbaSourceAverageTop = ColorFloatRGBA(); - m_frgbaSourceAverageBottom = ColorFloatRGBA(); - - m_boolMostLikelyFlip = false; - - m_fError = -1.0f; - m_fError1 = -1.0f; - m_fError2 = -1.0f; - m_boolSeverelyBentDifferentialColors = false; - } - - // ---------------------------------------------------------------------------------------------------- - // initialization prior to encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits - // - void Block4x4Encoding_ETC1::Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) - { - - // call ctor doesn't work, so call InitETC1 - //Block4x4Encoding_ETC1(); - InitETC1(); - - Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0); - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits); - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits of a previous encoding - // - void Block4x4Encoding_ETC1::Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) - { - // this can't hurt - InitETC1(); - - Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, iterationCount); - m_fError = -1.0f; - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; - - m_mode = MODE_ETC1; - m_boolDiff = m_pencodingbitsRGB8->individual.diff; - m_boolFlip = m_pencodingbitsRGB8->individual.flip; - if (m_boolDiff) - { - int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2); - int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2); - int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2); - - if (iR2 < 0) - { - iR2 = 0; - } - else if (iR2 > 31) - { - iR2 = 31; - } - - if (iG2 < 0) - { - iG2 = 0; - } - else if (iG2 > 31) - { - iG2 = 31; - } - - if (iB2 < 0) - { - iB2 = 0; - } - else if (iB2 > 31) - { - iB2 = 31; - } - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); - - } - else - { - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2); - } - - m_uiCW1 = m_pencodingbitsRGB8->individual.cw1; - m_uiCW2 = m_pencodingbitsRGB8->individual.cw2; - - InitFromEncodingBits_Selectors(); - - Decode(); - - CalcBlockError(); - } - - // ---------------------------------------------------------------------------------------------------- - // init the selectors from a prior encoding - // - void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void) - { - - unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors; - - for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++) - { - unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8)); - unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8)); - unsigned int uiShift = (unsigned int)(iPixel & 7); - - unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1); - unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1); - - m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort) - { - assert(!m_boolDone); - - switch (m_uiEncodingIterations) - { - case 0: - PerformFirstIteration(); - break; - - case 1: - TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 2: - TryIndividual(m_boolMostLikelyFlip, 1); - if (a_fEffort <= 49.5f) - { - m_boolDone = true; - } - break; - - case 3: - TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); - if (a_fEffort <= 59.5f) - { - m_boolDone = true; - } - break; - - case 4: - TryIndividual(!m_boolMostLikelyFlip, 1); - if (a_fEffort <= 69.5f) - { - m_boolDone = true; - } - break; - - case 5: - TryDegenerates1(); - if (a_fEffort <= 79.5f) - { - m_boolDone = true; - } - break; - - case 6: - TryDegenerates2(); - if (a_fEffort <= 89.5f) - { - m_boolDone = true; - } - break; - - case 7: - TryDegenerates3(); - if (a_fEffort <= 99.5f) - { - m_boolDone = true; - } - break; - - case 8: - TryDegenerates4(); - m_boolDone = true; - break; - - default: - assert(0); - break; - } - - m_uiEncodingIterations++; - SetDoneIfPerfect(); - } - - // ---------------------------------------------------------------------------------------------------- - // find best initial encoding to ensure block has a valid encoding - // - void Block4x4Encoding_ETC1::PerformFirstIteration(void) - { - CalculateMostLikelyFlip(); - - m_fError = FLT_MAX; - - TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - - TryIndividual(m_boolMostLikelyFlip, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - TryIndividual(!m_boolMostLikelyFlip, 0); - - } - - // ---------------------------------------------------------------------------------------------------- - // algorithm: - // create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half - // note: the "gray line" is the line of equal delta RGB that goes thru the average color - // for each half: - // see how close each of the 8 pixels are to the "gray line" that goes thru the source average color - // create an error value that is the sum of the distances from the gray line - // h_error is the sum of Left and Right errors - // v_error is the sum of Top and Bottom errors - // - void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void) - { - static const bool DEBUG_PRINT = false; - - CalculateSourceAverages(); - - float fLeftGrayErrorSum = 0.0f; - float fRightGrayErrorSum = 0.0f; - float fTopGrayErrorSum = 0.0f; - float fBottomGrayErrorSum = 0.0f; - - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - const ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel]; - const ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8]; - const ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]]; - const ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]]; - - float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft); - float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight); - float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop); - float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom); - - fLeftGrayErrorSum += fLeftGrayError; - fRightGrayErrorSum += fRightGrayError; - fTopGrayErrorSum += fTopGrayError; - fBottomGrayErrorSum += fBottomGrayError; - } - - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum); - } - - m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum); - - } - - // ---------------------------------------------------------------------------------------------------- - // calculate source pixel averages for each 2x2 quadrant in a 4x4 block - // these are used to determine the averages for each of the 4 different halves (left, right, top, bottom) - // ignore pixels that have alpha == NAN (these are border pixels outside of the source image) - // weight the averages based on a pixel's alpha - // - void Block4x4Encoding_ETC1::CalculateSourceAverages(void) - { - static const bool DEBUG_PRINT = false; - -// bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; -// -// if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX) - { - ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5]; - ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7]; - ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13]; - ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15]; - - // aveerage value of 8 pixels for each of the 4 corners - m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f; - m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f; - m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f; - m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f; - - // * doesn't multiply fA above, it calls ScaleRGB, so a = 8 - m_frgbaSourceAverageLeft.fA = 1.0f; - m_frgbaSourceAverageRight.fA = 1.0f; - m_frgbaSourceAverageTop.fA = 1.0f; - m_frgbaSourceAverageBottom.fA = 1.0f; - } -// else -// { -// float afSourceAlpha[PIXELS]; -// -// // treat alpha NAN as 0.0f -// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) -// { -// afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? -// 0.0f : -// m_pafrgbaSource[uiPixel].fA; -// } -// -// ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS]; -// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) -// { -// afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel]; -// } -// -// ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] + -// afrgbaAlphaWeightedSource[1] + -// afrgbaAlphaWeightedSource[4] + -// afrgbaAlphaWeightedSource[5]; -// -// ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] + -// afrgbaAlphaWeightedSource[3] + -// afrgbaAlphaWeightedSource[6] + -// afrgbaAlphaWeightedSource[7]; -// -// ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] + -// afrgbaAlphaWeightedSource[9] + -// afrgbaAlphaWeightedSource[12] + -// afrgbaAlphaWeightedSource[13]; -// -// ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] + -// afrgbaAlphaWeightedSource[11] + -// afrgbaAlphaWeightedSource[14] + -// afrgbaAlphaWeightedSource[15]; -// -// float fWeightSumUL = afSourceAlpha[0] + -// afSourceAlpha[1] + -// afSourceAlpha[4] + -// afSourceAlpha[5]; -// -// float fWeightSumLL = afSourceAlpha[2] + -// afSourceAlpha[3] + -// afSourceAlpha[6] + -// afSourceAlpha[7]; -// -// float fWeightSumUR = afSourceAlpha[8] + -// afSourceAlpha[9] + -// afSourceAlpha[12] + -// afSourceAlpha[13]; -// -// float fWeightSumLR = afSourceAlpha[10] + -// afSourceAlpha[11] + -// afSourceAlpha[14] + -// afSourceAlpha[15]; -// -// ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL; -// ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR; -// ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR; -// ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR; -// -// float fWeightSumLeft = fWeightSumUL + fWeightSumLL; -// float fWeightSumRight = fWeightSumUR + fWeightSumLR; -// float fWeightSumTop = fWeightSumUL + fWeightSumUR; -// float fWeightSumBottom = fWeightSumLL + fWeightSumLR; -// -// // check to see if there is at least 1 pixel with non-zero alpha -// // completely transparent block should not make it to this code -// assert((fWeightSumLeft + fWeightSumRight) > 0.0f); -// assert((fWeightSumTop + fWeightSumBottom) > 0.0f); -// -// if (fWeightSumLeft > 0.0f) -// { -// m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft); -// } -// if (fWeightSumRight > 0.0f) -// { -// m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight); -// } -// if (fWeightSumTop > 0.0f) -// { -// m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop); -// } -// if (fWeightSumBottom > 0.0f) -// { -// m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom); -// } -// -// if (fWeightSumLeft == 0.0f) -// { -// assert(fWeightSumRight > 0.0f); -// m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight; -// } -// if (fWeightSumRight == 0.0f) -// { -// assert(fWeightSumLeft > 0.0f); -// m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft; -// } -// if (fWeightSumTop == 0.0f) -// { -// assert(fWeightSumBottom > 0.0f); -// m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom; -// } -// if (fWeightSumBottom == 0.0f) -// { -// assert(fWeightSumTop > 0.0f); -// m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop; -// } -// } - - - - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n", - m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB, - m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB, - m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB, - m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try an ETC1 differential mode encoding - // use a_boolFlip to set the encoding F bit - // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] - // use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings - // replace the encoding if the encoding error is less than previous encoding - // - void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2) - { - - ColorFloatRGBA frgbaColor1; - ColorFloatRGBA frgbaColor2; - - const unsigned int *pauiPixelMapping1; - const unsigned int *pauiPixelMapping2; - - if (a_boolFlip) - { - frgbaColor1 = m_frgbaSourceAverageTop; - frgbaColor2 = m_frgbaSourceAverageBottom; - - pauiPixelMapping1 = s_auiTopPixelMapping; - pauiPixelMapping2 = s_auiBottomPixelMapping; - } - else - { - frgbaColor1 = m_frgbaSourceAverageLeft; - frgbaColor2 = m_frgbaSourceAverageRight; - - pauiPixelMapping1 = s_auiLeftPixelMapping; - pauiPixelMapping2 = s_auiRightPixelMapping; - } - - DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, - a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); - - Block4x4Encoding_ETC1 encodingTry = *this; - encodingTry.m_boolFlip = a_boolFlip; - - encodingTry.TryDifferentialHalf(&trys.m_half1); - encodingTry.TryDifferentialHalf(&trys.m_half2); - - // find best halves that are within differential range - DifferentialTrys::Try *ptryBest1 = nullptr; - DifferentialTrys::Try *ptryBest2 = nullptr; - encodingTry.m_fError = FLT_MAX; - - // see if the best of each half are in differential range - int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; - int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; - int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; - if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) - { - ptryBest1 = trys.m_half1.m_ptryBest; - ptryBest2 = trys.m_half2.m_ptryBest; - encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; - } - else - { - // else, find the next best halves that are in differential range - for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; - ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; - ptry1++) - { - for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; - ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; - ptry2++) - { - iDRed = ptry2->m_iRed - ptry1->m_iRed; - bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; - iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; - bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; - iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; - bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; - - if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) - { - float fError = ptry1->m_fError + ptry2->m_fError; - - if (fError < encodingTry.m_fError) - { - encodingTry.m_fError = fError; - - ptryBest1 = ptry1; - ptryBest2 = ptry2; - } - } - - } - } - assert(encodingTry.m_fError < FLT_MAX); - assert(ptryBest1 != nullptr); - assert(ptryBest2 != nullptr); - } - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_ETC1; - m_boolDiff = true; - m_boolFlip = encodingTry.m_boolFlip; - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); - m_uiCW1 = ptryBest1->m_uiCW; - m_uiCW2 = ptryBest2->m_uiCW; - - for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) - { - unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; - unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; - - unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; - unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; - - m_auiSelectors[uiPixel1] = uiSelector1; - m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; - - float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; - float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; - - m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); - m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); - } - - m_fError1 = ptryBest1->m_fError; - m_fError2 = ptryBest2->m_fError; - m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; - m_fError = m_fError1 + m_fError2; - - // sanity check - { - int iRed1 = m_frgbaColor1.IntRed(31.0f); - int iGreen1 = m_frgbaColor1.IntGreen(31.0f); - int iBlue1 = m_frgbaColor1.IntBlue(31.0f); - - int iRed2 = m_frgbaColor2.IntRed(31.0f); - int iGreen2 = m_frgbaColor2.IntGreen(31.0f); - int iBlue2 = m_frgbaColor2.IntBlue(31.0f); - - iDRed = iRed2 - iRed1; - iDGreen = iGreen2 - iGreen1; - iDBlue = iBlue2 - iBlue1; - - assert(iDRed >= -4 && iDRed < 4); - assert(iDGreen >= -4 && iDGreen < 4); - assert(iDBlue >= -4 && iDBlue < 4); - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try an ETC1 differential mode encoding for a half of a 4x4 block - // vary the basecolor components using a radius - // - void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) - { - - a_phalf->m_ptryBest = nullptr; - float fBestTryError = FLT_MAX; - - a_phalf->m_uiTrys = 0; - - int radius = (int)a_phalf->m_uiRadius; - int radiusGB = radius; - - // Only iterate one color on all grayscale, otherwise this picks a red color when - // encoding grayscale images, since it stops on an early iteration of red. - - // TODO: Why is grayscale image stopping on a early red radius iteration? - // Maybe error equal, but this doesn't prefer 0 radius result on equality. - // Can happen if metric isn't gray, so fix this. - - bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); - if (isGray) - { - // drop out green/blue iteration - radiusGB = 0; - } - - for (int iRed = a_phalf->m_iRed - radius; - iRed <= a_phalf->m_iRed + radius; - iRed++) - { - assert(iRed >= 0 && iRed <= 31); - - for (int iGreen = a_phalf->m_iGreen - radiusGB; - iGreen <= a_phalf->m_iGreen + radiusGB; - iGreen++) - { - assert(iGreen >= 0 && iGreen <= 31); - - for (int iBlue = a_phalf->m_iBlue - radiusGB; - iBlue <= a_phalf->m_iBlue + radiusGB; - iBlue++) - { - assert(iBlue >= 0 && iBlue <= 31); - - DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; - assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); - - if (isGray) - { - ptry->m_iRed = iRed; - ptry->m_iGreen = iRed; - ptry->m_iBlue = iRed; - } - else - { - ptry->m_iRed = iRed; - ptry->m_iGreen = iGreen; - ptry->m_iBlue = iBlue; - } - - ptry->m_fError = FLT_MAX; - ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue); - - // try each CW - for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) - { - unsigned int auiPixelSelectors[PIXELS / 2]; - ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; - float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - - // pre-compute decoded pixels for each selector - ColorFloatRGBA afrgbaSelectors[SELECTORS]; - assert(SELECTORS == 4); - afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); - afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); - afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); - afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); - - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; - - ColorFloatRGBA frgbaDecodedPixel; - - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - frgbaDecodedPixel = afrgbaSelectors[uiSelector]; - - float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); - - if (fPixelError < afPixelErrors[uiPixel]) - { - auiPixelSelectors[uiPixel] = uiSelector; - afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; - afPixelErrors[uiPixel] = fPixelError; - } - - } - } - - // add up all pixel errors - float fCWError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - fCWError += afPixelErrors[uiPixel]; - } - - // if best CW so far - if (fCWError < ptry->m_fError) - { - ptry->m_uiCW = uiCW; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; - } - ptry->m_fError = fCWError; - } - - } - - if (ptry->m_fError < fBestTryError) - { - a_phalf->m_ptryBest = ptry; - fBestTryError = ptry->m_fError; - } - - assert(ptry->m_fError < FLT_MAX); - - a_phalf->m_uiTrys++; - } - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try an ETC1 individual mode encoding - // use a_boolFlip to set the encoding F bit - // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] - // replace the encoding if the encoding error is less than previous encoding - // - void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius) - { - - ColorFloatRGBA frgbaColor1; - ColorFloatRGBA frgbaColor2; - - const unsigned int *pauiPixelMapping1; - const unsigned int *pauiPixelMapping2; - - if (a_boolFlip) - { - frgbaColor1 = m_frgbaSourceAverageTop; - frgbaColor2 = m_frgbaSourceAverageBottom; - - pauiPixelMapping1 = s_auiTopPixelMapping; - pauiPixelMapping2 = s_auiBottomPixelMapping; - } - else - { - frgbaColor1 = m_frgbaSourceAverageLeft; - frgbaColor2 = m_frgbaSourceAverageRight; - - pauiPixelMapping1 = s_auiLeftPixelMapping; - pauiPixelMapping2 = s_auiRightPixelMapping; - } - - IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius); - - Block4x4Encoding_ETC1 encodingTry = *this; - encodingTry.m_boolFlip = a_boolFlip; - - encodingTry.TryIndividualHalf(&trys.m_half1); - encodingTry.TryIndividualHalf(&trys.m_half2); - - // use the best of each half - IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest; - IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest; - encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_ETC1; - m_boolDiff = false; - m_boolFlip = encodingTry.m_boolFlip; - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); - m_uiCW1 = ptryBest1->m_uiCW; - m_uiCW2 = ptryBest2->m_uiCW; - - for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) - { - unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; - unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; - - unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; - unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; - - m_auiSelectors[uiPixel1] = uiSelector1; - m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; - - float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; - float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; - - m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); - m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); - } - - m_fError1 = ptryBest1->m_fError; - m_fError2 = ptryBest2->m_fError; - m_fError = m_fError1 + m_fError2; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try an ETC1 differential mode encoding for a half of a 4x4 block - // vary the basecolor components using a radius - // - void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf) - { - - a_phalf->m_ptryBest = nullptr; - float fBestTryError = FLT_MAX; - - a_phalf->m_uiTrys = 0; - - int radius = (int)a_phalf->m_uiRadius; - int radiusGB = radius; - - // only iterate one color on grayscale - // Note: this won't work for color images with gray blocks - bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); - if (isGray) - { - radiusGB = 0; - } - - for (int iRed = a_phalf->m_iRed - radius; - iRed <= a_phalf->m_iRed + radius; - iRed++) - { - assert(iRed >= 0 && iRed <= 15); - - for (int iGreen = a_phalf->m_iGreen - radiusGB; - iGreen <= a_phalf->m_iGreen + radiusGB; - iGreen++) - { - assert(iGreen >= 0 && iGreen <= 15); - - for (int iBlue = a_phalf->m_iBlue - radiusGB; - iBlue <= a_phalf->m_iBlue + radiusGB; - iBlue++) - { - assert(iBlue >= 0 && iBlue <= 15); - - IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; - assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]); - - if (isGray) - { - ptry->m_iRed = iRed; - ptry->m_iGreen = iRed; - ptry->m_iBlue = iRed; - } - else - { - ptry->m_iRed = iRed; - ptry->m_iGreen = iGreen; - ptry->m_iBlue = iBlue; - } - - ptry->m_fError = FLT_MAX; - ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue); - - // try each CW - for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) - { - unsigned int auiPixelSelectors[PIXELS / 2]; - ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; - float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - - // pre-compute decoded pixels for each selector - ColorFloatRGBA afrgbaSelectors[SELECTORS]; - assert(SELECTORS == 4); - afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); - afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); - afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); - afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); - - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; - //const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex]; - ColorFloatRGBA frgbaDecodedPixel; - - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - frgbaDecodedPixel = afrgbaSelectors[uiSelector]; - - float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); - - if (fPixelError < afPixelErrors[uiPixel]) - { - auiPixelSelectors[uiPixel] = uiSelector; - afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; - afPixelErrors[uiPixel] = fPixelError; - } - - } - } - - // add up all pixel errors - float fCWError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - fCWError += afPixelErrors[uiPixel]; - } - - // if best CW so far - if (fCWError < ptry->m_fError) - { - ptry->m_uiCW = uiCW; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; - } - ptry->m_fError = fCWError; - } - - } - - if (ptry->m_fError < fBestTryError) - { - a_phalf->m_ptryBest = ptry; - fBestTryError = ptry->m_fError; - } - - assert(ptry->m_fError < FLT_MAX); - - a_phalf->m_uiTrys++; - } - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 1 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_ETC1::TryDegenerates1(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); - TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 2 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_ETC1::TryDegenerates2(void) - { - - TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); - TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); - TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); - TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 3 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_ETC1::TryDegenerates3(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); - TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); - TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); - TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 4 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_ETC1::TryDegenerates4(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); - TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); - - } - - // ---------------------------------------------------------------------------------------------------- - // find the best selector for each pixel based on a particular basecolor and CW that have been previously set - // calculate the selectors for each half of the block separately - // set the block error as the sum of each half's error - // - void Block4x4Encoding_ETC1::CalculateSelectors() - { - if (m_boolFlip) - { - CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping); - CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping); - } - else - { - CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping); - CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping); - } - - m_fError = m_fError1 + m_fError2; - } - - // ---------------------------------------------------------------------------------------------------- - // choose best selectors for half of the block - // calculate the error for half of the block - // - void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf, - const unsigned int *pauiPixelMapping) - { - static const bool DEBUG_PRINT = false; - - ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1; - unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1; - - float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1; - *pfHalfError = FLT_MAX; - - // try each CW - for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) - { - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\ncw=%u\n", uiCW); - } - - unsigned int auiPixelSelectors[PIXELS / 2]; - ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; - float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR, - m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB); - } - - int srcPixelIndex = pauiPixelMapping[uiPixel]; - - //const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]]; - ColorFloatRGBA frgbaDecodedPixel; - - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - float fDeltaRGB = s_aafCwTable[uiCW][uiSelector]; - - frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB(); - - float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); - - bool isBelowError = false; - if (fPixelError < afPixelErrors[uiPixel]) - { - if (DEBUG_PRINT) - { - isBelowError = true; - } - - auiPixelSelectors[uiPixel] = uiSelector; - afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; - afPixelErrors[uiPixel] = fPixelError; - } - - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f%s\n", uiPixel, uiSelector, - frgbaDecodedPixel.fR, - frgbaDecodedPixel.fG, - frgbaDecodedPixel.fB, - fPixelError, - isBelowError ? " *": ""); - } - } - } - - // add up all pixel errors - float fCWError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - fCWError += afPixelErrors[uiPixel]; - } - if (DEBUG_PRINT) - { - KLOGI("EtcComp", "\terror %.2f\n", fCWError); - } - - // if best CW so far - if (fCWError < *pfHalfError) - { - *pfHalfError = fCWError; - *puiCW = uiCW; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel]; - m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel]; - } - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_ETC1::SetEncodingBits(void) - { - assert(m_mode == MODE_ETC1); - - if (m_boolDiff) - { - int iRed1 = m_frgbaColor1.IntRed(31.0f); - int iGreen1 = m_frgbaColor1.IntGreen(31.0f); - int iBlue1 = m_frgbaColor1.IntBlue(31.0f); - - int iRed2 = m_frgbaColor2.IntRed(31.0f); - int iGreen2 = m_frgbaColor2.IntGreen(31.0f); - int iBlue2 = m_frgbaColor2.IntBlue(31.0f); - - int iDRed2 = iRed2 - iRed1; - int iDGreen2 = iGreen2 - iGreen1; - int iDBlue2 = iBlue2 - iBlue1; - - assert(iDRed2 >= -4 && iDRed2 < 4); - assert(iDGreen2 >= -4 && iDGreen2 < 4); - assert(iDBlue2 >= -4 && iDBlue2 < 4); - - m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1; - m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1; - m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1; - - m_pencodingbitsRGB8->differential.dred2 = iDRed2; - m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; - m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; - } - else - { - m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); - m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); - m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); - - m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); - m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); - m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); - } - - m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; - m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; - - SetEncodingBits_Selectors(); - - m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff; - m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip; - - } - - // ---------------------------------------------------------------------------------------------------- - // set the selectors in the encoding bits - // - void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void) - { - - m_pencodingbitsRGB8->individual.selectors = 0; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - unsigned int uiSelector = m_auiSelectors[uiPixel]; - - // set index msb - m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8); - - // set index lsb - m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the decoded colors and decoded alpha based on the encoding state - // - void Block4x4Encoding_ETC1::Decode(void) - { - - const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; - - for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) - { - ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; - unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; - - unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; - - float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; - m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_ETC1.cpp + +Block4x4Encoding_ETC1 is the encoder to use when targetting file format ETC1. This encoder is also +used for the ETC1 subset of file format RGB8, RGBA8 and RGB8A1 + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_ETC1.h" + +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcDifferentialTrys.h" + +#include +#include +#include +#include +#include + +namespace Etc +{ + + // pixel processing order if the flip bit = 0 (horizontal split) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip0[PIXELS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; + + // pixel processing order if the flip bit = 1 (vertical split) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderFlip1[PIXELS] = { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }; + + // pixel processing order for horizontal scan (ETC normally does a vertical scan) + const unsigned int Block4x4Encoding_ETC1::s_auiPixelOrderHScan[PIXELS] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + // pixel indices for different block halves + const unsigned int Block4x4Encoding_ETC1::s_auiLeftPixelMapping[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + const unsigned int Block4x4Encoding_ETC1::s_auiRightPixelMapping[8] = { 8, 9, 10, 11, 12, 13, 14, 15 }; + const unsigned int Block4x4Encoding_ETC1::s_auiTopPixelMapping[8] = { 0, 1, 4, 5, 8, 9, 12, 13 }; + const unsigned int Block4x4Encoding_ETC1::s_auiBottomPixelMapping[8] = { 2, 3, 6, 7, 10, 11, 14, 15 }; + + // CW ranges that the ETC1 decoders use + // CW is basically a contrast for the different selector bits, since these values are offsets to the base color + // the first axis in the array is indexed by the CW in the encoding bits + // the second axis in the array is indexed by the selector bits + float Block4x4Encoding_ETC1::s_aafCwTable[CW_RANGES][SELECTORS] = + { + { 2.0f / 255.0f, 8.0f / 255.0f, -2.0f / 255.0f, -8.0f / 255.0f }, + { 5.0f / 255.0f, 17.0f / 255.0f, -5.0f / 255.0f, -17.0f / 255.0f }, + { 9.0f / 255.0f, 29.0f / 255.0f, -9.0f / 255.0f, -29.0f / 255.0f }, + { 13.0f / 255.0f, 42.0f / 255.0f, -13.0f / 255.0f, -42.0f / 255.0f }, + { 18.0f / 255.0f, 60.0f / 255.0f, -18.0f / 255.0f, -60.0f / 255.0f }, + { 24.0f / 255.0f, 80.0f / 255.0f, -24.0f / 255.0f, -80.0f / 255.0f }, + { 33.0f / 255.0f, 106.0f / 255.0f, -33.0f / 255.0f, -106.0f / 255.0f }, + { 47.0f / 255.0f, 183.0f / 255.0f, -47.0f / 255.0f, -183.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_ETC1::Block4x4Encoding_ETC1(void) + { + InitETC1(); + } + + Block4x4Encoding_ETC1::~Block4x4Encoding_ETC1(void) {} + + void Block4x4Encoding_ETC1::InitETC1() + { + m_mode = MODE_ETC1; + m_boolDiff = false; + m_boolFlip = false; + m_frgbaColor1 = ColorFloatRGBA(); + m_frgbaColor2 = ColorFloatRGBA(); + m_uiCW1 = 0; + m_uiCW2 = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = 0; + //m_afDecodedAlphas[uiPixel] = 1.0f; + } + + // these aren't initialized + m_frgbaSourceAverageLeft = ColorFloatRGBA(); + m_frgbaSourceAverageRight = ColorFloatRGBA(); + m_frgbaSourceAverageTop = ColorFloatRGBA(); + m_frgbaSourceAverageBottom = ColorFloatRGBA(); + + m_boolMostLikelyFlip = false; + + m_fError = -1.0f; + m_fError1 = -1.0f; + m_fError2 = -1.0f; + m_boolSeverelyBentDifferentialColors = false; + } + + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_ETC1::Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + + // call ctor doesn't work, so call InitETC1 + //Block4x4Encoding_ETC1(); + InitETC1(); + + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_ETC1::Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) + { + // this can't hurt + InitETC1(); + + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, iterationCount); + m_fError = -1.0f; + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + m_mode = MODE_ETC1; + m_boolDiff = m_pencodingbitsRGB8->individual.diff; + m_boolFlip = m_pencodingbitsRGB8->individual.flip; + if (m_boolDiff) + { + int iR2 = (int)(m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2); + int iG2 = (int)(m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2); + int iB2 = (int)(m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2); + + if (iR2 < 0) + { + iR2 = 0; + } + else if (iR2 > 31) + { + iR2 = 31; + } + + if (iG2 < 0) + { + iG2 = 0; + } + else if (iG2 > 31) + { + iG2 = 31; + } + + if (iB2 < 0) + { + iB2 = 0; + } + else if (iB2 > 31) + { + iB2 = 31; + } + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); + + } + else + { + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red1, m_pencodingbitsRGB8->individual.green1, m_pencodingbitsRGB8->individual.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(m_pencodingbitsRGB8->individual.red2, m_pencodingbitsRGB8->individual.green2, m_pencodingbitsRGB8->individual.blue2); + } + + m_uiCW1 = m_pencodingbitsRGB8->individual.cw1; + m_uiCW2 = m_pencodingbitsRGB8->individual.cw2; + + InitFromEncodingBits_Selectors(); + + Decode(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // init the selectors from a prior encoding + // + void Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(void) + { + + unsigned char *paucSelectors = (unsigned char *)&m_pencodingbitsRGB8->individual.selectors; + + for (unsigned int iPixel = 0; iPixel < PIXELS; iPixel++) + { + unsigned int uiByteMSB = (unsigned int)(1 - (iPixel / 8)); + unsigned int uiByteLSB = (unsigned int)(3 - (iPixel / 8)); + unsigned int uiShift = (unsigned int)(iPixel & 7); + + unsigned int uiSelectorMSB = (unsigned int)((paucSelectors[uiByteMSB] >> uiShift) & 1); + unsigned int uiSelectorLSB = (unsigned int)((paucSelectors[uiByteLSB] >> uiShift) & 1); + + m_auiSelectors[iPixel] = (uiSelectorMSB << 1) + uiSelectorLSB; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_ETC1::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIteration(); + break; + + case 1: + TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + TryIndividual(m_boolMostLikelyFlip, 1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 3: + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 4: + TryIndividual(!m_boolMostLikelyFlip, 1); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 5: + TryDegenerates1(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryDegenerates2(); + if (a_fEffort <= 89.5f) + { + m_boolDone = true; + } + break; + + case 7: + TryDegenerates3(); + if (a_fEffort <= 99.5f) + { + m_boolDone = true; + } + break; + + case 8: + TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_ETC1::PerformFirstIteration(void) + { + CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + + TryIndividual(m_boolMostLikelyFlip, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryIndividual(!m_boolMostLikelyFlip, 0); + + } + + // ---------------------------------------------------------------------------------------------------- + // algorithm: + // create a source average color for the Left, Right, Top and Bottom halves using the 8 pixels in each half + // note: the "gray line" is the line of equal delta RGB that goes thru the average color + // for each half: + // see how close each of the 8 pixels are to the "gray line" that goes thru the source average color + // create an error value that is the sum of the distances from the gray line + // h_error is the sum of Left and Right errors + // v_error is the sum of Top and Bottom errors + // + void Block4x4Encoding_ETC1::CalculateMostLikelyFlip(void) + { + static const bool DEBUG_PRINT = false; + + CalculateSourceAverages(); + + float fLeftGrayErrorSum = 0.0f; + float fRightGrayErrorSum = 0.0f; + float fTopGrayErrorSum = 0.0f; + float fBottomGrayErrorSum = 0.0f; + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + const ColorFloatRGBA *pfrgbaLeft = &m_pafrgbaSource[uiPixel]; + const ColorFloatRGBA *pfrgbaRight = &m_pafrgbaSource[uiPixel + 8]; + const ColorFloatRGBA *pfrgbaTop = &m_pafrgbaSource[s_auiTopPixelMapping[uiPixel]]; + const ColorFloatRGBA *pfrgbaBottom = &m_pafrgbaSource[s_auiBottomPixelMapping[uiPixel]]; + + float fLeftGrayError = CalcGrayDistance2(*pfrgbaLeft, m_frgbaSourceAverageLeft); + float fRightGrayError = CalcGrayDistance2(*pfrgbaRight, m_frgbaSourceAverageRight); + float fTopGrayError = CalcGrayDistance2(*pfrgbaTop, m_frgbaSourceAverageTop); + float fBottomGrayError = CalcGrayDistance2(*pfrgbaBottom, m_frgbaSourceAverageBottom); + + fLeftGrayErrorSum += fLeftGrayError; + fRightGrayErrorSum += fRightGrayError; + fTopGrayErrorSum += fTopGrayError; + fBottomGrayErrorSum += fBottomGrayError; + } + + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\n%.2f %.2f\n", fLeftGrayErrorSum + fRightGrayErrorSum, fTopGrayErrorSum + fBottomGrayErrorSum); + } + + m_boolMostLikelyFlip = (fTopGrayErrorSum + fBottomGrayErrorSum) < (fLeftGrayErrorSum + fRightGrayErrorSum); + + } + + // ---------------------------------------------------------------------------------------------------- + // calculate source pixel averages for each 2x2 quadrant in a 4x4 block + // these are used to determine the averages for each of the 4 different halves (left, right, top, bottom) + // ignore pixels that have alpha == NAN (these are border pixels outside of the source image) + // weight the averages based on a pixel's alpha + // + void Block4x4Encoding_ETC1::CalculateSourceAverages(void) + { + static const bool DEBUG_PRINT = false; + +// bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; +// +// if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE || boolRGBX) + { + ColorFloatRGBA frgbaSumUL = m_pafrgbaSource[0] + m_pafrgbaSource[1] + m_pafrgbaSource[4] + m_pafrgbaSource[5]; + ColorFloatRGBA frgbaSumLL = m_pafrgbaSource[2] + m_pafrgbaSource[3] + m_pafrgbaSource[6] + m_pafrgbaSource[7]; + ColorFloatRGBA frgbaSumUR = m_pafrgbaSource[8] + m_pafrgbaSource[9] + m_pafrgbaSource[12] + m_pafrgbaSource[13]; + ColorFloatRGBA frgbaSumLR = m_pafrgbaSource[10] + m_pafrgbaSource[11] + m_pafrgbaSource[14] + m_pafrgbaSource[15]; + + // average value of 8 pixels for each of the 4 corners + m_frgbaSourceAverageLeft = (frgbaSumUL + frgbaSumLL) * 0.125f; + m_frgbaSourceAverageRight = (frgbaSumUR + frgbaSumLR) * 0.125f; + m_frgbaSourceAverageTop = (frgbaSumUL + frgbaSumUR) * 0.125f; + m_frgbaSourceAverageBottom = (frgbaSumLL + frgbaSumLR) * 0.125f; + + // * doesn't multiply fA above, it calls ScaleRGB, so a = 8 + m_frgbaSourceAverageLeft.fA = 1.0f; + m_frgbaSourceAverageRight.fA = 1.0f; + m_frgbaSourceAverageTop.fA = 1.0f; + m_frgbaSourceAverageBottom.fA = 1.0f; + } +// else +// { +// float afSourceAlpha[PIXELS]; +// +// // treat alpha NAN as 0.0f +// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) +// { +// afSourceAlpha[uiPixel] = isnan(m_pafrgbaSource[uiPixel].fA) ? +// 0.0f : +// m_pafrgbaSource[uiPixel].fA; +// } +// +// ColorFloatRGBA afrgbaAlphaWeightedSource[PIXELS]; +// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) +// { +// afrgbaAlphaWeightedSource[uiPixel] = m_pafrgbaSource[uiPixel] * afSourceAlpha[uiPixel]; +// } +// +// ColorFloatRGBA frgbaSumUL = afrgbaAlphaWeightedSource[0] + +// afrgbaAlphaWeightedSource[1] + +// afrgbaAlphaWeightedSource[4] + +// afrgbaAlphaWeightedSource[5]; +// +// ColorFloatRGBA frgbaSumLL = afrgbaAlphaWeightedSource[2] + +// afrgbaAlphaWeightedSource[3] + +// afrgbaAlphaWeightedSource[6] + +// afrgbaAlphaWeightedSource[7]; +// +// ColorFloatRGBA frgbaSumUR = afrgbaAlphaWeightedSource[8] + +// afrgbaAlphaWeightedSource[9] + +// afrgbaAlphaWeightedSource[12] + +// afrgbaAlphaWeightedSource[13]; +// +// ColorFloatRGBA frgbaSumLR = afrgbaAlphaWeightedSource[10] + +// afrgbaAlphaWeightedSource[11] + +// afrgbaAlphaWeightedSource[14] + +// afrgbaAlphaWeightedSource[15]; +// +// float fWeightSumUL = afSourceAlpha[0] + +// afSourceAlpha[1] + +// afSourceAlpha[4] + +// afSourceAlpha[5]; +// +// float fWeightSumLL = afSourceAlpha[2] + +// afSourceAlpha[3] + +// afSourceAlpha[6] + +// afSourceAlpha[7]; +// +// float fWeightSumUR = afSourceAlpha[8] + +// afSourceAlpha[9] + +// afSourceAlpha[12] + +// afSourceAlpha[13]; +// +// float fWeightSumLR = afSourceAlpha[10] + +// afSourceAlpha[11] + +// afSourceAlpha[14] + +// afSourceAlpha[15]; +// +// ColorFloatRGBA frgbaSumLeft = frgbaSumUL + frgbaSumLL; +// ColorFloatRGBA frgbaSumRight = frgbaSumUR + frgbaSumLR; +// ColorFloatRGBA frgbaSumTop = frgbaSumUL + frgbaSumUR; +// ColorFloatRGBA frgbaSumBottom = frgbaSumLL + frgbaSumLR; +// +// float fWeightSumLeft = fWeightSumUL + fWeightSumLL; +// float fWeightSumRight = fWeightSumUR + fWeightSumLR; +// float fWeightSumTop = fWeightSumUL + fWeightSumUR; +// float fWeightSumBottom = fWeightSumLL + fWeightSumLR; +// +// // check to see if there is at least 1 pixel with non-zero alpha +// // completely transparent block should not make it to this code +// assert((fWeightSumLeft + fWeightSumRight) > 0.0f); +// assert((fWeightSumTop + fWeightSumBottom) > 0.0f); +// +// if (fWeightSumLeft > 0.0f) +// { +// m_frgbaSourceAverageLeft = frgbaSumLeft * (1.0f/fWeightSumLeft); +// } +// if (fWeightSumRight > 0.0f) +// { +// m_frgbaSourceAverageRight = frgbaSumRight * (1.0f/fWeightSumRight); +// } +// if (fWeightSumTop > 0.0f) +// { +// m_frgbaSourceAverageTop = frgbaSumTop * (1.0f/fWeightSumTop); +// } +// if (fWeightSumBottom > 0.0f) +// { +// m_frgbaSourceAverageBottom = frgbaSumBottom * (1.0f/fWeightSumBottom); +// } +// +// if (fWeightSumLeft == 0.0f) +// { +// assert(fWeightSumRight > 0.0f); +// m_frgbaSourceAverageLeft = m_frgbaSourceAverageRight; +// } +// if (fWeightSumRight == 0.0f) +// { +// assert(fWeightSumLeft > 0.0f); +// m_frgbaSourceAverageRight = m_frgbaSourceAverageLeft; +// } +// if (fWeightSumTop == 0.0f) +// { +// assert(fWeightSumBottom > 0.0f); +// m_frgbaSourceAverageTop = m_frgbaSourceAverageBottom; +// } +// if (fWeightSumBottom == 0.0f) +// { +// assert(fWeightSumTop > 0.0f); +// m_frgbaSourceAverageBottom = m_frgbaSourceAverageTop; +// } +// } + + + + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\ntarget: [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f] [%.2f,%.2f,%.2f]\n", + m_frgbaSourceAverageLeft.fR, m_frgbaSourceAverageLeft.fG, m_frgbaSourceAverageLeft.fB, + m_frgbaSourceAverageRight.fR, m_frgbaSourceAverageRight.fG, m_frgbaSourceAverageRight.fB, + m_frgbaSourceAverageTop.fR, m_frgbaSourceAverageTop.fG, m_frgbaSourceAverageTop.fB, + m_frgbaSourceAverageBottom.fR, m_frgbaSourceAverageBottom.fG, m_frgbaSourceAverageBottom.fB); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding + // use a_boolFlip to set the encoding F bit + // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] + // use a_iGrayOffset1 and a_iGrayOffset2 to offset the basecolor to search for degenerate encodings + // replace the encoding if the encoding error is less than previous encoding + // + void Block4x4Encoding_ETC1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, + a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); + + Block4x4Encoding_ETC1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryDifferentialHalf(&trys.m_half1); + encodingTry.TryDifferentialHalf(&trys.m_half2); + + // find best halves that are within differential range + DifferentialTrys::Try *ptryBest1 = nullptr; + DifferentialTrys::Try *ptryBest2 = nullptr; + encodingTry.m_fError = FLT_MAX; + + // see if the best of each half are in differential range + int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; + int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; + int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; + if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) + { + ptryBest1 = trys.m_half1.m_ptryBest; + ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + } + else + { + // else, find the next best halves that are in differential range + for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; + ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; + ptry1++) + { + for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; + ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; + ptry2++) + { + iDRed = ptry2->m_iRed - ptry1->m_iRed; + bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; + iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; + bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; + iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; + bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; + + if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) + { + float fError = ptry1->m_fError + ptry2->m_fError; + + if (fError < encodingTry.m_fError) + { + encodingTry.m_fError = fError; + + ptryBest1 = ptry1; + ptryBest2 = ptry2; + } + } + + } + } + assert(encodingTry.m_fError < FLT_MAX); + assert(ptryBest1 != nullptr); + assert(ptryBest2 != nullptr); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; + float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; + + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; + m_fError = m_fError1 + m_fError2; + + // sanity check + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + iDRed = iRed2 - iRed1; + iDGreen = iGreen2 - iGreen1; + iDBlue = iBlue2 - iBlue1; + + assert(iDRed >= -4 && iDRed < 4); + assert(iDGreen >= -4 && iDGreen < 4); + assert(iDBlue >= -4 && iDBlue < 4); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding for a half of a 4x4 block + // vary the basecolor components using a radius + // + void Block4x4Encoding_ETC1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + + int radius = (int)a_phalf->m_uiRadius; + int radiusGB = radius; + + // Only iterate one color on all grayscale, otherwise this picks a red color when + // encoding grayscale images, since it stops on an early iteration of red. + + // TODO: Why is grayscale image stopping on a early red radius iteration? + // Maybe error equal, but this doesn't prefer 0 radius result on equality. + // Can happen if metric isn't gray, so fix this. + + bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); + if (isGray) + { + // drop out green/blue iteration + radiusGB = 0; + } + + for (int iRed = a_phalf->m_iRed - radius; + iRed <= a_phalf->m_iRed + radius; + iRed++) + { + assert(iRed >= 0 && iRed <= 31); + + for (int iGreen = a_phalf->m_iGreen - radiusGB; + iGreen <= a_phalf->m_iGreen + radiusGB; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 31); + + for (int iBlue = a_phalf->m_iBlue - radiusGB; + iBlue <= a_phalf->m_iBlue + radiusGB; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 31); + + DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); + + if (isGray) + { + ptry->m_iRed = iRed; + ptry->m_iGreen = iRed; + ptry->m_iBlue = iRed; + } + else + { + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + } + + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; + + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 individual mode encoding + // use a_boolFlip to set the encoding F bit + // use a_uiRadius to alter basecolor components in the range[-a_uiRadius:a_uiRadius] + // replace the encoding if the encoding error is less than previous encoding + // + void Block4x4Encoding_ETC1::TryIndividual(bool a_boolFlip, unsigned int a_uiRadius) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + IndividualTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, a_uiRadius); + + Block4x4Encoding_ETC1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryIndividualHalf(&trys.m_half1); + encodingTry.TryIndividualHalf(&trys.m_half2); + + // use the best of each half + IndividualTrys::Try *ptryBest1 = trys.m_half1.m_ptryBest; + IndividualTrys::Try *ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = false; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + float fDeltaRGB1 = s_aafCwTable[m_uiCW1][uiSelector1]; + float fDeltaRGB2 = s_aafCwTable[m_uiCW2][uiSelector2]; + + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_fError = m_fError1 + m_fError2; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try an ETC1 differential mode encoding for a half of a 4x4 block + // vary the basecolor components using a radius + // + void Block4x4Encoding_ETC1::TryIndividualHalf(IndividualTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + + int radius = (int)a_phalf->m_uiRadius; + int radiusGB = radius; + + // only iterate one color on grayscale + // Note: this won't work for color images with gray blocks + bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); + if (isGray) + { + radiusGB = 0; + } + + for (int iRed = a_phalf->m_iRed - radius; + iRed <= a_phalf->m_iRed + radius; + iRed++) + { + assert(iRed >= 0 && iRed <= 15); + + for (int iGreen = a_phalf->m_iGreen - radiusGB; + iGreen <= a_phalf->m_iGreen + radiusGB; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 15); + + for (int iBlue = a_phalf->m_iBlue - radiusGB; + iBlue <= a_phalf->m_iBlue + radiusGB; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 15); + + IndividualTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[IndividualTrys::Half::MAX_TRYS]); + + if (isGray) + { + ptry->m_iRed = iRed; + ptry->m_iGreen = iRed; + ptry->m_iBlue = iRed; + } + else + { + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + } + + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB4((unsigned char)ptry->m_iRed, (unsigned char)ptry->m_iGreen, (unsigned char)ptry->m_iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = (frgbaColor + s_aafCwTable[uiCW][2]).ClampRGB(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; + //const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 1 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates1(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 2 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates2(void) + { + + TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 3 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates3(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 4 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_ETC1::TryDegenerates4(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); + + } + + // ---------------------------------------------------------------------------------------------------- + // find the best selector for each pixel based on a particular basecolor and CW that have been previously set + // calculate the selectors for each half of the block separately + // set the block error as the sum of each half's error + // + void Block4x4Encoding_ETC1::CalculateSelectors() + { + if (m_boolFlip) + { + CalculateHalfOfTheSelectors(0, s_auiTopPixelMapping); + CalculateHalfOfTheSelectors(1, s_auiBottomPixelMapping); + } + else + { + CalculateHalfOfTheSelectors(0, s_auiLeftPixelMapping); + CalculateHalfOfTheSelectors(1, s_auiRightPixelMapping); + } + + m_fError = m_fError1 + m_fError2; + } + + // ---------------------------------------------------------------------------------------------------- + // choose best selectors for half of the block + // calculate the error for half of the block + // + void Block4x4Encoding_ETC1::CalculateHalfOfTheSelectors(unsigned int a_uiHalf, + const unsigned int *pauiPixelMapping) + { + static const bool DEBUG_PRINT = false; + + ColorFloatRGBA *pfrgbaColor = a_uiHalf ? &m_frgbaColor2 : &m_frgbaColor1; + unsigned int *puiCW = a_uiHalf ? &m_uiCW2 : &m_uiCW1; + + float *pfHalfError = a_uiHalf ? &m_fError2 : &m_fError1; + *pfHalfError = FLT_MAX; + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\ncw=%u\n", uiCW); + } + + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedPixels[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\tsource [%.2f,%.2f,%.2f]\n", m_pafrgbaSource[pauiPixelMapping[uiPixel]].fR, + m_pafrgbaSource[pauiPixelMapping[uiPixel]].fG, m_pafrgbaSource[pauiPixelMapping[uiPixel]].fB); + } + + int srcPixelIndex = pauiPixelMapping[uiPixel]; + + //const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[pauiPixelMapping[uiPixel]]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + float fDeltaRGB = s_aafCwTable[uiCW][uiSelector]; + + frgbaDecodedPixel = (*pfrgbaColor + fDeltaRGB).ClampRGB(); + + float fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); + + bool isBelowError = false; + if (fPixelError < afPixelErrors[uiPixel]) + { + if (DEBUG_PRINT) + { + isBelowError = true; + } + + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedPixels[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\tpixel %u, index %u [%.2f,%.2f,%.2f], error %.2f%s\n", uiPixel, uiSelector, + frgbaDecodedPixel.fR, + frgbaDecodedPixel.fG, + frgbaDecodedPixel.fB, + fPixelError, + isBelowError ? " *": ""); + } + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + if (DEBUG_PRINT) + { + KLOGI("EtcComp", "\terror %.2f\n", fCWError); + } + + // if best CW so far + if (fCWError < *pfHalfError) + { + *pfHalfError = fCWError; + *puiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + m_auiSelectors[pauiPixelMapping[uiPixel]] = auiPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[pauiPixelMapping[uiPixel]] = afrgbaDecodedPixels[uiPixel]; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_ETC1::SetEncodingBits(void) + { + assert(m_mode == MODE_ETC1); + + if (m_boolDiff) + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + int iDRed2 = iRed2 - iRed1; + int iDGreen2 = iGreen2 - iGreen1; + int iDBlue2 = iBlue2 - iBlue1; + + assert(iDRed2 >= -4 && iDRed2 < 4); + assert(iDGreen2 >= -4 && iDGreen2 < 4); + assert(iDBlue2 >= -4 && iDBlue2 < 4); + + m_pencodingbitsRGB8->differential.red1 = (unsigned int)iRed1; + m_pencodingbitsRGB8->differential.green1 = (unsigned int)iGreen1; + m_pencodingbitsRGB8->differential.blue1 = (unsigned int)iBlue1; + + m_pencodingbitsRGB8->differential.dred2 = iDRed2; + m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; + m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; + } + else + { + m_pencodingbitsRGB8->individual.red1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + m_pencodingbitsRGB8->individual.green1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + m_pencodingbitsRGB8->individual.blue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + m_pencodingbitsRGB8->individual.red2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + m_pencodingbitsRGB8->individual.green2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + m_pencodingbitsRGB8->individual.blue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + } + + m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; + m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; + + SetEncodingBits_Selectors(); + + m_pencodingbitsRGB8->individual.diff = (unsigned int)m_boolDiff; + m_pencodingbitsRGB8->individual.flip = (unsigned int)m_boolFlip; + + } + + // ---------------------------------------------------------------------------------------------------- + // set the selectors in the encoding bits + // + void Block4x4Encoding_ETC1::SetEncodingBits_Selectors(void) + { + + m_pencodingbitsRGB8->individual.selectors = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiSelector = m_auiSelectors[uiPixel]; + + // set index msb + m_pencodingbitsRGB8->individual.selectors |= (uiSelector >> 1) << (uiPixel ^ 8); + + // set index lsb + m_pencodingbitsRGB8->individual.selectors |= (uiSelector & 1) << ((16 + uiPixel) ^ 8); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_ETC1::Decode(void) + { + + const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) + { + ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; + unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; + + unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; + + float fDelta = s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; + m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h index 295d19b0..0ceca140 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_ETC1.h @@ -1,190 +1,190 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcBlock4x4Encoding.h" -#include "EtcBlock4x4EncodingBits.h" -#include "EtcDifferentialTrys.h" -#include "EtcIndividualTrys.h" - -namespace Etc -{ - - // base class for Block4x4Encoding_RGB8 - class Block4x4Encoding_ETC1 : public Block4x4Encoding - { - public: - - Block4x4Encoding_ETC1(void); - virtual ~Block4x4Encoding_ETC1(void); - - virtual void Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - - unsigned char *a_paucEncodingBits, - ErrorMetric a_errormetric) override; - - virtual void Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - - ErrorMetric a_errormetric, - uint16_t iterationCount) override; - - virtual void PerformIteration(float a_fEffort) override; - - virtual void SetEncodingBits(void) override; - - inline bool GetFlip(void) const - { - return m_boolFlip; - } - - inline bool IsDifferential(void) const - { - return m_boolDiff; - } - - inline bool HasSeverelyBentDifferentialColors(void) const - { - return m_boolSeverelyBentDifferentialColors; - } - - //-------------------- - - void Decode(void); - - inline ColorFloatRGBA GetColor1(void) const - { - return m_frgbaColor1; - } - - inline ColorFloatRGBA GetColor2(void) const - { - return m_frgbaColor2; - } - - inline const unsigned int * GetSelectors(void) const - { - return m_auiSelectors; - } - - inline unsigned int GetCW1(void) const - { - return m_uiCW1; - } - - inline unsigned int GetCW2(void) const - { - return m_uiCW2; - } - - protected: // RGBA calls over into this, so it's nearly impossible to make private - - void InitFromEncodingBits_Selectors(void); - - void PerformFirstIteration(void); - void CalculateMostLikelyFlip(void); - - void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2); - void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); - - void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius); - void TryIndividualHalf(IndividualTrys::Half *a_phalf); - - void TryDegenerates1(void); - void TryDegenerates2(void); - void TryDegenerates3(void); - void TryDegenerates4(void); - - void CalculateSelectors(); - void CalculateHalfOfTheSelectors(unsigned int a_uiHalf, - const unsigned int *pauiPixelMapping); - - // calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line - inline float CalcGrayDistance2(const ColorFloatRGBA &r_frgbaPixel, - const ColorFloatRGBA &r_frgbaTarget) - { - float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) + - (r_frgbaPixel.fG - r_frgbaTarget.fG) + - (r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f; - - ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB(); - - float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR; - float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG; - float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB; - - return (fDR*fDR) + (fDG*fDG) + (fDB*fDB); - } - - void SetEncodingBits_Selectors(void); - - // intermediate encoding - bool m_boolDiff; - bool m_boolFlip; - ColorFloatRGBA m_frgbaColor1; - ColorFloatRGBA m_frgbaColor2; - unsigned int m_uiCW1; - unsigned int m_uiCW2; - unsigned int m_auiSelectors[PIXELS]; - - // state shared between iterations - ColorFloatRGBA m_frgbaSourceAverageLeft; - ColorFloatRGBA m_frgbaSourceAverageRight; - ColorFloatRGBA m_frgbaSourceAverageTop; - ColorFloatRGBA m_frgbaSourceAverageBottom; - bool m_boolMostLikelyFlip; - - // stats - float m_fError1; // error for Etc1 half 1 - float m_fError2; // error for Etc1 half 2 - bool m_boolSeverelyBentDifferentialColors; // only valid if m_boolDiff; - - // final encoding - Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8; // or RGB8 portion of Block4x4EncodingBits_RGB8A8 - - // bunch of tables and constants - static const unsigned int s_auiPixelOrderFlip0[PIXELS]; - static const unsigned int s_auiPixelOrderFlip1[PIXELS]; - static const unsigned int s_auiPixelOrderHScan[PIXELS]; - - static const unsigned int s_auiLeftPixelMapping[8]; - static const unsigned int s_auiRightPixelMapping[8]; - static const unsigned int s_auiTopPixelMapping[8]; - static const unsigned int s_auiBottomPixelMapping[8]; - - static const unsigned int SELECTOR_BITS = 2; - static const unsigned int SELECTORS = 1 << SELECTOR_BITS; - - static const unsigned int CW_BITS = 3; - static const unsigned int CW_RANGES = 1 << CW_BITS; - - static float s_aafCwTable[CW_RANGES][SELECTORS]; - static unsigned char s_aucDifferentialCwRange[256]; - - static const int MAX_DIFFERENTIAL = 3; - static const int MIN_DIFFERENTIAL = -4; - - private: - - void CalculateSourceAverages(void); - void InitETC1(); - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcDifferentialTrys.h" +#include "EtcIndividualTrys.h" + +namespace Etc +{ + + // base class for Block4x4Encoding_RGB8 + class Block4x4Encoding_ETC1 : public Block4x4Encoding + { + public: + + Block4x4Encoding_ETC1(void); + virtual ~Block4x4Encoding_ETC1(void); + + virtual void Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) override; + + virtual void Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric, + uint16_t iterationCount) override; + + virtual void PerformIteration(float a_fEffort) override; + + virtual void SetEncodingBits(void) override; + + inline bool GetFlip(void) const + { + return m_boolFlip; + } + + inline bool IsDifferential(void) const + { + return m_boolDiff; + } + + inline bool HasSeverelyBentDifferentialColors(void) const + { + return m_boolSeverelyBentDifferentialColors; + } + + //-------------------- + + void Decode(void); + + inline ColorFloatRGBA GetColor1(void) const + { + return m_frgbaColor1; + } + + inline ColorFloatRGBA GetColor2(void) const + { + return m_frgbaColor2; + } + + inline const unsigned int * GetSelectors(void) const + { + return m_auiSelectors; + } + + inline unsigned int GetCW1(void) const + { + return m_uiCW1; + } + + inline unsigned int GetCW2(void) const + { + return m_uiCW2; + } + + protected: // RGBA calls over into this, so it's nearly impossible to make private + + void InitFromEncodingBits_Selectors(void); + + void PerformFirstIteration(void); + void CalculateMostLikelyFlip(void); + + void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); + + void TryIndividual(bool a_boolFlip, unsigned int a_uiRadius); + void TryIndividualHalf(IndividualTrys::Half *a_phalf); + + void TryDegenerates1(void); + void TryDegenerates2(void); + void TryDegenerates3(void); + void TryDegenerates4(void); + + void CalculateSelectors(); + void CalculateHalfOfTheSelectors(unsigned int a_uiHalf, + const unsigned int *pauiPixelMapping); + + // calculate the distance2 of r_frgbaPixel from r_frgbaTarget's gray line + inline float CalcGrayDistance2(const ColorFloatRGBA &r_frgbaPixel, + const ColorFloatRGBA &r_frgbaTarget) + { + float fDeltaGray = ((r_frgbaPixel.fR - r_frgbaTarget.fR) + + (r_frgbaPixel.fG - r_frgbaTarget.fG) + + (r_frgbaPixel.fB - r_frgbaTarget.fB)) / 3.0f; + + ColorFloatRGBA frgbaPointOnGrayLine = (r_frgbaTarget + fDeltaGray).ClampRGB(); + + float fDR = r_frgbaPixel.fR - frgbaPointOnGrayLine.fR; + float fDG = r_frgbaPixel.fG - frgbaPointOnGrayLine.fG; + float fDB = r_frgbaPixel.fB - frgbaPointOnGrayLine.fB; + + return (fDR*fDR) + (fDG*fDG) + (fDB*fDB); + } + + void SetEncodingBits_Selectors(void); + + // intermediate encoding + bool m_boolDiff; + bool m_boolFlip; + ColorFloatRGBA m_frgbaColor1; + ColorFloatRGBA m_frgbaColor2; + unsigned int m_uiCW1; + unsigned int m_uiCW2; + unsigned int m_auiSelectors[PIXELS]; + + // state shared between iterations + ColorFloatRGBA m_frgbaSourceAverageLeft; + ColorFloatRGBA m_frgbaSourceAverageRight; + ColorFloatRGBA m_frgbaSourceAverageTop; + ColorFloatRGBA m_frgbaSourceAverageBottom; + bool m_boolMostLikelyFlip; + + // stats + float m_fError1; // error for Etc1 half 1 + float m_fError2; // error for Etc1 half 2 + bool m_boolSeverelyBentDifferentialColors; // only valid if m_boolDiff; + + // final encoding + Block4x4EncodingBits_RGB8 *m_pencodingbitsRGB8; // or RGB8 portion of Block4x4EncodingBits_RGB8A8 + + // bunch of tables and constants + static const unsigned int s_auiPixelOrderFlip0[PIXELS]; + static const unsigned int s_auiPixelOrderFlip1[PIXELS]; + static const unsigned int s_auiPixelOrderHScan[PIXELS]; + + static const unsigned int s_auiLeftPixelMapping[8]; + static const unsigned int s_auiRightPixelMapping[8]; + static const unsigned int s_auiTopPixelMapping[8]; + static const unsigned int s_auiBottomPixelMapping[8]; + + static const unsigned int SELECTOR_BITS = 2; + static const unsigned int SELECTORS = 1 << SELECTOR_BITS; + + static const unsigned int CW_BITS = 3; + static const unsigned int CW_RANGES = 1 << CW_BITS; + + static float s_aafCwTable[CW_RANGES][SELECTORS]; + static unsigned char s_aucDifferentialCwRange[256]; + + static const int MAX_DIFFERENTIAL = 3; + static const int MIN_DIFFERENTIAL = -4; + + private: + + void CalculateSourceAverages(void); + void InitETC1(); + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp index cb37505c..957967ba 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_R11.cpp @@ -1,544 +1,544 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_R11.cpp - -Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11). - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_R11.h" - -#include "EtcBlock4x4EncodingBits.h" -//#include "EtcBlock4x4.h" - -#include -#include -#include -#include -#include -//#include - -namespace Etc -{ - template - T clamp(T value, T mn, T mx) { - return (value <= mn) ? mn : ((value >= mx) ? mx : value); - } - - const int MODIFIER_TABLE_ENTRYS = 16; - const int SELECTOR_BITS = 3; - const int SELECTORS = 1 << SELECTOR_BITS; - - // modifier values to use for R11, SR11, RG11 and SRG11 - const int8_t s_modifierTable8[MODIFIER_TABLE_ENTRYS][SELECTORS] - { - { -3, -6, -9, -15, 2, 5, 8, 14 }, - { -3, -7, -10, -13, 2, 6, 9, 12 }, - { -2, -5, -8, -13, 1, 4, 7, 12 }, - { -2, -4, -6, -13, 1, 3, 5, 12 }, - - { -3, -6, -8, -12, 2, 5, 7, 11 }, - { -3, -7, -9, -11, 2, 6, 8, 10 }, - { -4, -7, -8, -11, 3, 6, 7, 10 }, - { -3, -5, -8, -11, 2, 4, 7, 10 }, - - { -2, -6, -8, -10, 1, 5, 7, 9 }, - { -2, -5, -8, -10, 1, 4, 7, 9 }, - { -2, -4, -8, -10, 1, 3, 7, 9 }, - { -2, -5, -7, -10, 1, 4, 6, 9 }, - - { -3, -4, -7, -10, 2, 3, 6, 9 }, - { -1, -2, -3, -10, 0, 1, 2, 9 }, - { -4, -6, -8, -9, 3, 5, 7, 8 }, - { -3, -5, -7, -9, 2, 4, 6, 8 } - }; - - // this is simplified for interation - // stripped down, since it's one of the hotspots of encoding - inline int DecodePixelRedInt(int baseMul8Plus4, int multiplier, int modifier) - { - int pixel = baseMul8Plus4 + modifier * multiplier; - - // see here - // https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.html - -// if (multiplier > 0) -// { -// //fPixel = (a_fBase * 8 + 4) + 8 * fModifier * a_fMultiplier; -// pixel = baseMul8Plus4 + 8 * modifier * multiplier; -// } -// else -// { -// //fPixel = (a_fBase * 8 + 4) + fModifier; -// pixel = baseMul8Plus4 + modifier; -// } - - // just to debug over range pixels -// if (pixel < 0 || pixel > 2047) -// { -// int bp = 0; -// bp = bp; -// } - - // modifier and multiplier can push base outside valid range, but hw clamps - pixel = clamp(pixel, 0, 2047); - return pixel; - } - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding_R11::Block4x4Encoding_R11(void) - { - m_pencodingbitsR11 = nullptr; - } - - Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {} - - // ---------------------------------------------------------------------------------------------------- - void Block4x4Encoding_R11::Encode( - const float *sourcePixels, - uint8_t *encodingBits, - bool isSnorm - ) - { - int numSourceChannels = 4; // advance by 4 floats - - int fMinRed = 2047; - int fMaxRed = 0; - - // assumption of unorm float data for sourcePixels here - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int fRed = clamp((int)roundf(2047.0f * sourcePixels[numSourceChannels * uiPixel]), 0, 2047); - - if (fRed < fMinRed) - { - fMinRed = fRed; - } - if (fRed > fMaxRed) - { - fMaxRed = fRed; - } - - m_srcPixels[uiPixel] = fRed; - } - - m_redMin = fMinRed; - m_redMax = fMaxRed; - - // now setup for iteration - m_uiEncodingIterations = 0; - m_fError = FLT_MAX; - m_isDone = false; - m_isSnorm = isSnorm; - - m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; - } - - // ---------------------------------------------------------------------------------------------------- - void Block4x4Encoding_R11::Decode( - uint8_t *encodingBits, - const float *sourcePixels, - bool isSnorm, - uint16_t lastIterationCount - ) - { - m_isDone = (lastIterationCount & 0x80) != 0; // done high bit - - if (m_isDone) - { - m_pencodingbitsR11 = nullptr; // skip decode/encode on partially done block - m_fError = 0.0f; - return; - } - - m_uiEncodingIterations = lastIterationCount; - - // everything is re-established from the encoded block and iteration count - // since we already have to allocate the block storage, an iteration count per block is only additional - // also encoders are now across all blocks, so could just allocate one block per thread and iterate until - // done and skip the priority system. - // - // Note: don't call this on done blocks and then iterate, or iteration count will advance - // m_isDone is set to false in the Encode. Priority queue should ignore done blocks already. - - m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; - m_isSnorm = isSnorm; - - if (m_isSnorm) - { - m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128; - } - else - { - m_redBase = (uint8_t)m_pencodingbitsR11->data.base; - } - - m_redMultiplier = m_pencodingbitsR11->data.multiplier; - m_redModifierTableIndex = m_pencodingbitsR11->data.table; - - uint64_t selectorBits = 0; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - uint64_t uiShift = 45 - (3 * uiPixel); - m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1); - } - - // call this to continue encoding later iterations - Encode(sourcePixels, encodingBits, isSnorm); - - // recompute the block error by decoding each pixel - // could save out error to SortedBlock avoid needing to compute all this - // but would need to store r and g error separately. - int blockError = 0; - - int baseForDecode = m_redBase * 8 + 4; - int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier); - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]]; - - int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); - - // add up the error - int error = decodedPixelData - m_srcPixels[uiPixel]; - blockError += error * error; - } - - m_fError = (float)blockError; - } - - void Block4x4Encoding_R11::DecodeOnly( - const uint8_t *encodingBits, - float *decodedPixels, - bool isSnorm) - { - m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; - m_isSnorm = isSnorm; - - if (m_isSnorm) - { - m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128; - } - else - { - m_redBase = (uint8_t)m_pencodingbitsR11->data.base; - } - - m_redMultiplier = m_pencodingbitsR11->data.multiplier; - m_redModifierTableIndex = m_pencodingbitsR11->data.table; - - uint64_t selectorBits = 0; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8; - selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - uint64_t uiShift = 45 - (3 * uiPixel); - m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1); - } - - // now extract the pixels from the block values above - int numChannels = 4; - - int baseForDecode = m_redBase * 8 + 4; - int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier); - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]]; - - int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); - - decodedPixels[uiPixel * numChannels] = decodedPixelData / 2047.0f; - } - } - - // ---------------------------------------------------------------------------------------------------- - - // 16 pixels x 1 unit squared out of 2047 - const float kErrorTolerance = 16 * 1 * 1; - - void Block4x4Encoding_R11::PerformIteration(float a_fEffort) - { - if (m_pencodingbitsR11 == nullptr) - { - return; - } - - if (m_isDone) - { - return; - } - - switch (m_uiEncodingIterations) - { - case 0: - CalculateR11(8, 0, 0); - break; - - case 1: - CalculateR11(8, 2, 1); - if (a_fEffort <= 24.5f) // TODO: decouple effort from this, this is more of an iteration quality - { - m_isDone = true; - } - break; - - case 2: - CalculateR11(8, 12, 1); - if (a_fEffort <= 49.5f) - { - m_isDone = true; - } - break; - - case 3: - CalculateR11(7, 6, 1); - break; - - case 4: - CalculateR11(6, 3, 1); - break; - - case 5: - CalculateR11(5, 1, 0); - m_isDone = true; - break; - } - - // advance to next iteration - if (!m_isDone) - { - if (m_fError < kErrorTolerance) - { - m_isDone = true; - } - else - { - m_uiEncodingIterations++; - } - } - } - - // ---------------------------------------------------------------------------------------------------- - - // find the best combination of base color, multiplier and selectors - void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, - int a_fBaseRadius, int a_fMultiplierRadius) - { - // maps from virtual (monotonic) selector to ETC selector - static const uint8_t auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7}; - - // don't search any extra radius if range is 0 - // TODO: there's probably an instant "done" case here without any iteration - int fRedRange = (m_redMax - m_redMin); - - if (fRedRange == 0) - { - a_fBaseRadius = 0; - a_fMultiplierRadius = 0; - } - - // 16 x 8 x 3 x 16 x 16 x 8 iterations = 786K iteraatins / block worst case - - // try each modifier table entry - // 16 of these - for (int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++) - { - // up to 8 of these - for (int uiMinVirtualSelector = 0; - uiMinVirtualSelector <= (int)(8 - a_uiSelectorsUsed); - uiMinVirtualSelector++) - { - int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1; - - int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector]; - int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector]; - - int fTableEntryCenter = -s_modifierTable8[uiTableEntry][uiMinSelector]; - - int fTableEntryRange = s_modifierTable8[uiTableEntry][uiMaxSelector] - - s_modifierTable8[uiTableEntry][uiMinSelector]; - - float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange; - float fCenter = m_redMin + fCenterRatio * fRedRange; - int fCenterInt = (int)roundf((255.0f/2047.0f) * fCenter); - - // base of 0 to 255 maps to 0 to 2047 - // search a radius of values off center of range - int fMinBase = fCenterInt - a_fBaseRadius; - int fMaxBase = fCenterInt + a_fBaseRadius; - if (fMinBase < 0) - { - fMinBase = 0; - } - if (fMaxBase > 255) - { - fMaxBase = 255; - } - - // 255 / up to 29 - int fRangeMultiplier = (int)roundf((fRedRange * (255.0 / 2047.0f)) / fTableEntryRange); - - int fMinMultiplier = clamp(fRangeMultiplier - a_fMultiplierRadius, 0, 15); // yes, 0 - int fMaxMultiplier = clamp(fRangeMultiplier + a_fMultiplierRadius, 1, 15); - - // find best selector for each pixel - uint8_t bestSelectors[PIXELS]; - int bestRedError[PIXELS]; - - // only for debug - //int bestPixelRed[PIXELS]; - - // up to 3 of these - for (int fBase = fMinBase; fBase <= fMaxBase; fBase++) - { - int baseForDecode = fBase * 8 + 4; - - // up to 16 of these - for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++) - { - int multiplierForDecode = (fMultiplier == 0) ? 1 : (8 * fMultiplier); - - // 16 of these - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int bestPixelError = 2047 * 2047; - - // 8 of these - for (int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - int modifier = s_modifierTable8[uiTableEntry][uiSelector]; - - int fPixelRed = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); - - int error = fPixelRed - (int)m_srcPixels[uiPixel]; - error *= error; - - // this is guaranteed to pick one selector for every pixel - // the one with the lowest error. - if (error < bestPixelError) - { - bestPixelError = error; - bestRedError[uiPixel] = error; - bestSelectors[uiPixel] = uiSelector; - - //bestPixelRed[uiPixel] = fPixelRed; - } - } - } - - // accumulate all best pixel error into block error total - int blockError = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - blockError += bestRedError[uiPixel]; - } - - // pick that if it's the smallest error - if (m_fError > (float)blockError) - { - m_fError = (float)blockError; - - if (m_isSnorm) - { - m_redBase = fBase - 128; - } - else - { - m_redBase = fBase; - } - m_redMultiplier = fMultiplier; - m_redModifierTableIndex = uiTableEntry; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_redSelectors[uiPixel] = bestSelectors[uiPixel]; - - // nothing looks at this data, but useful to compare to source - //m_decodedPixels[uiPixel] = bestPixelRed[uiPixel]; // / 2047.0f; - } - - - // compare to tolerance, since reaching 0 is difficult in float - if (m_fError <= kErrorTolerance) - { - return; - } - } - } - } - - } - } - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_R11::SetEncodingBits(void) - { - // skip encode if block is already done - if (m_pencodingbitsR11 == nullptr) - { - return; - } - - if (m_isSnorm) - { - m_pencodingbitsR11->data.base = (int8_t)m_redBase; - } - else - { - m_pencodingbitsR11->data.base = (uint8_t)m_redBase; - } - m_pencodingbitsR11->data.table = m_redModifierTableIndex; - m_pencodingbitsR11->data.multiplier = m_redMultiplier; - - uint64_t selectorBits = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - uint64_t uiShift = 45 - (3 * uiPixel); - selectorBits |= ((uint64_t)m_redSelectors[uiPixel]) << uiShift; - } - - m_pencodingbitsR11->data.selectors0 = uint32_t(selectorBits >> (uint64_t)40); - m_pencodingbitsR11->data.selectors1 = uint32_t(selectorBits >> (uint64_t)32); - m_pencodingbitsR11->data.selectors2 = uint32_t(selectorBits >> (uint64_t)24); - m_pencodingbitsR11->data.selectors3 = uint32_t(selectorBits >> (uint64_t)16); - m_pencodingbitsR11->data.selectors4 = uint32_t(selectorBits >> (uint64_t)8); - m_pencodingbitsR11->data.selectors5 = uint32_t(selectorBits); - } - - // ---------------------------------------------------------------------------------------------------- - // -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_R11.cpp + +Block4x4Encoding_R11 is the encoder to use when targetting file format R11 and SR11 (signed R11). + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_R11.h" + +#include "EtcBlock4x4EncodingBits.h" +//#include "EtcBlock4x4.h" + +#include +#include +#include +#include +#include +//#include + +namespace Etc +{ + template + T clamp(T value, T mn, T mx) { + return (value <= mn) ? mn : ((value >= mx) ? mx : value); + } + + const int MODIFIER_TABLE_ENTRYS = 16; + const int SELECTOR_BITS = 3; + const int SELECTORS = 1 << SELECTOR_BITS; + + // modifier values to use for R11, SR11, RG11 and SRG11 + const int8_t s_modifierTable8[MODIFIER_TABLE_ENTRYS][SELECTORS] + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, + { -3, -7, -10, -13, 2, 6, 9, 12 }, + { -2, -5, -8, -13, 1, 4, 7, 12 }, + { -2, -4, -6, -13, 1, 3, 5, 12 }, + + { -3, -6, -8, -12, 2, 5, 7, 11 }, + { -3, -7, -9, -11, 2, 6, 8, 10 }, + { -4, -7, -8, -11, 3, 6, 7, 10 }, + { -3, -5, -8, -11, 2, 4, 7, 10 }, + + { -2, -6, -8, -10, 1, 5, 7, 9 }, + { -2, -5, -8, -10, 1, 4, 7, 9 }, + { -2, -4, -8, -10, 1, 3, 7, 9 }, + { -2, -5, -7, -10, 1, 4, 6, 9 }, + + { -3, -4, -7, -10, 2, 3, 6, 9 }, + { -1, -2, -3, -10, 0, 1, 2, 9 }, + { -4, -6, -8, -9, 3, 5, 7, 8 }, + { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + // this is simplified for interation + // stripped down, since it's one of the hotspots of encoding + inline int DecodePixelRedInt(int baseMul8Plus4, int multiplier, int modifier) + { + int pixel = baseMul8Plus4 + modifier * multiplier; + + // see here + // https://www.khronos.org/registry/DataFormat/specs/1.1/dataformat.1.1.html + +// if (multiplier > 0) +// { +// //fPixel = (a_fBase * 8 + 4) + 8 * fModifier * a_fMultiplier; +// pixel = baseMul8Plus4 + 8 * modifier * multiplier; +// } +// else +// { +// //fPixel = (a_fBase * 8 + 4) + fModifier; +// pixel = baseMul8Plus4 + modifier; +// } + + // just to debug over range pixels +// if (pixel < 0 || pixel > 2047) +// { +// int bp = 0; +// bp = bp; +// } + + // modifier and multiplier can push base outside valid range, but hw clamps + pixel = clamp(pixel, 0, 2047); + return pixel; + } + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_R11::Block4x4Encoding_R11(void) + { + m_pencodingbitsR11 = nullptr; + } + + Block4x4Encoding_R11::~Block4x4Encoding_R11(void) {} + + // ---------------------------------------------------------------------------------------------------- + void Block4x4Encoding_R11::Encode( + const float *sourcePixels, + uint8_t *encodingBits, + bool isSnorm + ) + { + int numSourceChannels = 4; // advance by 4 floats + + int fMinRed = 2047; + int fMaxRed = 0; + + // assumption of unorm float data for sourcePixels here + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int fRed = clamp((int)roundf(2047.0f * sourcePixels[numSourceChannels * uiPixel]), 0, 2047); + + if (fRed < fMinRed) + { + fMinRed = fRed; + } + if (fRed > fMaxRed) + { + fMaxRed = fRed; + } + + m_srcPixels[uiPixel] = fRed; + } + + m_redMin = fMinRed; + m_redMax = fMaxRed; + + // now setup for iteration + m_uiEncodingIterations = 0; + m_fError = FLT_MAX; + m_isDone = false; + m_isSnorm = isSnorm; + + m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; + } + + // ---------------------------------------------------------------------------------------------------- + void Block4x4Encoding_R11::Decode( + uint8_t *encodingBits, + const float *sourcePixels, + bool isSnorm, + uint16_t lastIterationCount + ) + { + m_isDone = (lastIterationCount & 0x80) != 0; // done high bit + + if (m_isDone) + { + m_pencodingbitsR11 = nullptr; // skip decode/encode on partially done block + m_fError = 0.0f; + return; + } + + m_uiEncodingIterations = lastIterationCount; + + // everything is re-established from the encoded block and iteration count + // since we already have to allocate the block storage, an iteration count per block is only additional + // also encoders are now across all blocks, so could just allocate one block per thread and iterate until + // done and skip the priority system. + // + // Note: don't call this on done blocks and then iterate, or iteration count will advance + // m_isDone is set to false in the Encode. Priority queue should ignore done blocks already. + + m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; + m_isSnorm = isSnorm; + + if (m_isSnorm) + { + m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128; + } + else + { + m_redBase = (uint8_t)m_pencodingbitsR11->data.base; + } + + m_redMultiplier = m_pencodingbitsR11->data.multiplier; + m_redModifierTableIndex = m_pencodingbitsR11->data.table; + + uint64_t selectorBits = 0; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + uint64_t uiShift = 45 - (3 * uiPixel); + m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1); + } + + // call this to continue encoding later iterations + Encode(sourcePixels, encodingBits, isSnorm); + + // recompute the block error by decoding each pixel + // could save out error to SortedBlock avoid needing to compute all this + // but would need to store r and g error separately. + int blockError = 0; + + int baseForDecode = m_redBase * 8 + 4; + int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier); + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]]; + + int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); + + // add up the error + int error = decodedPixelData - m_srcPixels[uiPixel]; + blockError += error * error; + } + + m_fError = (float)blockError; + } + + void Block4x4Encoding_R11::DecodeOnly( + const uint8_t *encodingBits, + float *decodedPixels, + bool isSnorm) + { + m_pencodingbitsR11 = (Block4x4EncodingBits_R11 *)encodingBits; + m_isSnorm = isSnorm; + + if (m_isSnorm) + { + m_redBase = (int8_t)m_pencodingbitsR11->data.base + 128; + } + else + { + m_redBase = (uint8_t)m_pencodingbitsR11->data.base; + } + + m_redMultiplier = m_pencodingbitsR11->data.multiplier; + m_redModifierTableIndex = m_pencodingbitsR11->data.table; + + uint64_t selectorBits = 0; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors0 << (uint64_t)40; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors1 << (uint64_t)32; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors2 << (uint64_t)24; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors3 << (uint64_t)16; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors4 << (uint64_t)8; + selectorBits |= (uint64_t)m_pencodingbitsR11->data.selectors5; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + uint64_t uiShift = 45 - (3 * uiPixel); + m_redSelectors[uiPixel] = (selectorBits >> uiShift) & (uint64_t)(SELECTORS - 1); + } + + // now extract the pixels from the block values above + int numChannels = 4; + + int baseForDecode = m_redBase * 8 + 4; + int multiplierForDecode = (m_redMultiplier == 0) ? 1 : (8 * m_redMultiplier); + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int modifier = s_modifierTable8[m_redModifierTableIndex][m_redSelectors[uiPixel]]; + + int decodedPixelData = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); + + decodedPixels[uiPixel * numChannels] = decodedPixelData / 2047.0f; + } + } + + // ---------------------------------------------------------------------------------------------------- + + // 16 pixels x 1 unit squared out of 2047 + const float kErrorTolerance = 16 * 1 * 1; + + void Block4x4Encoding_R11::PerformIteration(float a_fEffort) + { + if (m_pencodingbitsR11 == nullptr) + { + return; + } + + if (m_isDone) + { + return; + } + + switch (m_uiEncodingIterations) + { + case 0: + CalculateR11(8, 0, 0); + break; + + case 1: + CalculateR11(8, 2, 1); + if (a_fEffort <= 24.5f) // TODO: decouple effort from this, this is more of an iteration quality + { + m_isDone = true; + } + break; + + case 2: + CalculateR11(8, 12, 1); + if (a_fEffort <= 49.5f) + { + m_isDone = true; + } + break; + + case 3: + CalculateR11(7, 6, 1); + break; + + case 4: + CalculateR11(6, 3, 1); + break; + + case 5: + CalculateR11(5, 1, 0); + m_isDone = true; + break; + } + + // advance to next iteration + if (!m_isDone) + { + if (m_fError < kErrorTolerance) + { + m_isDone = true; + } + else + { + m_uiEncodingIterations++; + } + } + } + + // ---------------------------------------------------------------------------------------------------- + + // find the best combination of base color, multiplier and selectors + void Block4x4Encoding_R11::CalculateR11(unsigned int a_uiSelectorsUsed, + int a_fBaseRadius, int a_fMultiplierRadius) + { + // maps from virtual (monotonic) selector to ETC selector + static const uint8_t auiVirtualSelectorMap[8] = {3, 2, 1, 0, 4, 5, 6, 7}; + + // don't search any extra radius if range is 0 + // TODO: there's probably an instant "done" case here without any iteration + int fRedRange = (m_redMax - m_redMin); + + if (fRedRange == 0) + { + a_fBaseRadius = 0; + a_fMultiplierRadius = 0; + } + + // 16 x 8 x 3 x 16 x 16 x 8 iterations = 786K iteraatins / block worst case + + // try each modifier table entry + // 16 of these + for (int uiTableEntry = 0; uiTableEntry < MODIFIER_TABLE_ENTRYS; uiTableEntry++) + { + // up to 8 of these + for (int uiMinVirtualSelector = 0; + uiMinVirtualSelector <= (int)(8 - a_uiSelectorsUsed); + uiMinVirtualSelector++) + { + int uiMaxVirtualSelector = uiMinVirtualSelector + a_uiSelectorsUsed - 1; + + int uiMinSelector = auiVirtualSelectorMap[uiMinVirtualSelector]; + int uiMaxSelector = auiVirtualSelectorMap[uiMaxVirtualSelector]; + + int fTableEntryCenter = -s_modifierTable8[uiTableEntry][uiMinSelector]; + + int fTableEntryRange = s_modifierTable8[uiTableEntry][uiMaxSelector] - + s_modifierTable8[uiTableEntry][uiMinSelector]; + + float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange; + float fCenter = m_redMin + fCenterRatio * fRedRange; + int fCenterInt = (int)roundf((255.0f/2047.0f) * fCenter); + + // base of 0 to 255 maps to 0 to 2047 + // search a radius of values off center of range + int fMinBase = fCenterInt - a_fBaseRadius; + int fMaxBase = fCenterInt + a_fBaseRadius; + if (fMinBase < 0) + { + fMinBase = 0; + } + if (fMaxBase > 255) + { + fMaxBase = 255; + } + + // 255 / up to 29 + int fRangeMultiplier = (int)roundf((fRedRange * (255.0 / 2047.0f)) / fTableEntryRange); + + int fMinMultiplier = clamp(fRangeMultiplier - a_fMultiplierRadius, 0, 15); // yes, 0 + int fMaxMultiplier = clamp(fRangeMultiplier + a_fMultiplierRadius, 1, 15); + + // find best selector for each pixel + uint8_t bestSelectors[PIXELS]; + int bestRedError[PIXELS]; + + // only for debug + //int bestPixelRed[PIXELS]; + + // up to 3 of these + for (int fBase = fMinBase; fBase <= fMaxBase; fBase++) + { + int baseForDecode = fBase * 8 + 4; + + // up to 16 of these + for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++) + { + int multiplierForDecode = (fMultiplier == 0) ? 1 : (8 * fMultiplier); + + // 16 of these + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int bestPixelError = 2047 * 2047; + + // 8 of these + for (int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + int modifier = s_modifierTable8[uiTableEntry][uiSelector]; + + int fPixelRed = DecodePixelRedInt(baseForDecode, multiplierForDecode, modifier); + + int error = fPixelRed - (int)m_srcPixels[uiPixel]; + error *= error; + + // this is guaranteed to pick one selector for every pixel + // the one with the lowest error. + if (error < bestPixelError) + { + bestPixelError = error; + bestRedError[uiPixel] = error; + bestSelectors[uiPixel] = uiSelector; + + //bestPixelRed[uiPixel] = fPixelRed; + } + } + } + + // accumulate all best pixel error into block error total + int blockError = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + blockError += bestRedError[uiPixel]; + } + + // pick that if it's the smallest error + if (m_fError > (float)blockError) + { + m_fError = (float)blockError; + + if (m_isSnorm) + { + m_redBase = fBase - 128; + } + else + { + m_redBase = fBase; + } + m_redMultiplier = fMultiplier; + m_redModifierTableIndex = uiTableEntry; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_redSelectors[uiPixel] = bestSelectors[uiPixel]; + + // nothing looks at this data, but useful to compare to source + //m_decodedPixels[uiPixel] = bestPixelRed[uiPixel]; // / 2047.0f; + } + + + // compare to tolerance, since reaching 0 is difficult in float + if (m_fError <= kErrorTolerance) + { + return; + } + } + } + } + + } + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_R11::SetEncodingBits(void) + { + // skip encode if block is already done + if (m_pencodingbitsR11 == nullptr) + { + return; + } + + if (m_isSnorm) + { + m_pencodingbitsR11->data.base = (int8_t)m_redBase; + } + else + { + m_pencodingbitsR11->data.base = (uint8_t)m_redBase; + } + m_pencodingbitsR11->data.table = m_redModifierTableIndex; + m_pencodingbitsR11->data.multiplier = m_redMultiplier; + + uint64_t selectorBits = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + uint64_t uiShift = 45 - (3 * uiPixel); + selectorBits |= ((uint64_t)m_redSelectors[uiPixel]) << uiShift; + } + + m_pencodingbitsR11->data.selectors0 = uint32_t(selectorBits >> (uint64_t)40); + m_pencodingbitsR11->data.selectors1 = uint32_t(selectorBits >> (uint64_t)32); + m_pencodingbitsR11->data.selectors2 = uint32_t(selectorBits >> (uint64_t)24); + m_pencodingbitsR11->data.selectors3 = uint32_t(selectorBits >> (uint64_t)16); + m_pencodingbitsR11->data.selectors4 = uint32_t(selectorBits >> (uint64_t)8); + m_pencodingbitsR11->data.selectors5 = uint32_t(selectorBits); + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_R11.h b/libkram/etc2comp/EtcBlock4x4Encoding_R11.h index 5c175d9e..31c1a21c 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_R11.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_R11.h @@ -1,136 +1,136 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -//#include "EtcBlock4x4Encoding_RGB8.h" - -namespace Etc -{ - class Block4x4EncodingBits_R11; - - // ################################################################################ - // Block4x4Encoding_R11 - // ################################################################################ - - // Simpler interface for R11 and RG11 without all the code/data from Block4x4. - class IBlockEncoding - { - public: - virtual ~IBlockEncoding() {} - - // setup block for encoding iteration, isDone() true when finished - virtual void Encode( - const float *sourcePixels,uint8_t *encodingBits, bool isSnorm) = 0; - - // this is for decoding a block in multipass - virtual void Decode( - uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, - uint16_t lastIterationCount - ) = 0; - - // for decoding a block for display or conversion - virtual void DecodeOnly( - const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) = 0; - - // iterate to reduce the error - virtual void PerformIteration(float a_fEffort) = 0; - - // write out block - virtual void SetEncodingBits(void) = 0; - - // when error is zero, or effort level also limits iteration - virtual bool IsDone() const = 0; - - virtual uint16_t GetIterationCount() const = 0; - - virtual float GetError() const = 0; - }; - - class Block4x4Encoding_R11 : public IBlockEncoding - { - public: - - Block4x4Encoding_R11(void); - virtual ~Block4x4Encoding_R11(void); - - // setup block for encoding iteration, isDone() true when finished - virtual void Encode( - const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override; - - // this is for decoding a block in multipass - virtual void Decode( - uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, - uint16_t lastIterationCount) override; - - // for decoding a block for display or conversion - virtual void DecodeOnly( - const uint8_t *encodingBits, - float *decodedPixels, - bool isSnorm) override; - - virtual void PerformIteration(float a_fEffort) override; - - virtual void SetEncodingBits(void) override; - - virtual bool IsDone() const override { return m_isDone; } - - // done bit embedded into high bit of each 8-bit count - virtual uint16_t GetIterationCount() const override - { - uint16_t count = m_uiEncodingIterations; - if (m_isDone) - { - count |= 0x80; // done high bit - } - return count; - } - - virtual float GetError() const override { return m_fError; } - - private: - void CalculateR11(unsigned int a_uiSelectorsUsed, - int a_fBaseRadius, int a_fMultiplierRadius); - - Block4x4EncodingBits_R11 *m_pencodingbitsR11; - - //float m_fRedBlockError; - - static const int PIXELS = 16; // 4 * 4 - - // adding data for block reuse (only set on first iteration) - int16_t m_srcPixels[PIXELS]; - int16_t m_redMin; - int16_t m_redMax; - - // this can all be encoded/decoded from the EAC block - int16_t m_redBase; - int16_t m_redMultiplier; - uint8_t m_redSelectors[PIXELS]; - uint8_t m_redModifierTableIndex; - - bool m_isDone; - bool m_isSnorm; // shifts fBase by 128 - - // this is only data needed to reiterate, can decode and build up rest - uint8_t m_uiEncodingIterations; - float m_fError; // 22-bits + 4-bits = 26 bits - }; - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +//#include "EtcBlock4x4Encoding_RGB8.h" + +namespace Etc +{ + class Block4x4EncodingBits_R11; + + // ################################################################################ + // Block4x4Encoding_R11 + // ################################################################################ + + // Simpler interface for R11 and RG11 without all the code/data from Block4x4. + class IBlockEncoding + { + public: + virtual ~IBlockEncoding() {} + + // setup block for encoding iteration, isDone() true when finished + virtual void Encode( + const float *sourcePixels,uint8_t *encodingBits, bool isSnorm) = 0; + + // this is for decoding a block in multipass + virtual void Decode( + uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, + uint16_t lastIterationCount + ) = 0; + + // for decoding a block for display or conversion + virtual void DecodeOnly( + const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) = 0; + + // iterate to reduce the error + virtual void PerformIteration(float a_fEffort) = 0; + + // write out block + virtual void SetEncodingBits(void) = 0; + + // when error is zero, or effort level also limits iteration + virtual bool IsDone() const = 0; + + virtual uint16_t GetIterationCount() const = 0; + + virtual float GetError() const = 0; + }; + + class Block4x4Encoding_R11 : public IBlockEncoding + { + public: + + Block4x4Encoding_R11(void); + virtual ~Block4x4Encoding_R11(void); + + // setup block for encoding iteration, isDone() true when finished + virtual void Encode( + const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override; + + // this is for decoding a block in multipass + virtual void Decode( + uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, + uint16_t lastIterationCount) override; + + // for decoding a block for display or conversion + virtual void DecodeOnly( + const uint8_t *encodingBits, + float *decodedPixels, + bool isSnorm) override; + + virtual void PerformIteration(float a_fEffort) override; + + virtual void SetEncodingBits(void) override; + + virtual bool IsDone() const override { return m_isDone; } + + // done bit embedded into high bit of each 8-bit count + virtual uint16_t GetIterationCount() const override + { + uint16_t count = m_uiEncodingIterations; + if (m_isDone) + { + count |= 0x80; // done high bit + } + return count; + } + + virtual float GetError() const override { return m_fError; } + + private: + void CalculateR11(unsigned int a_uiSelectorsUsed, + int a_fBaseRadius, int a_fMultiplierRadius); + + Block4x4EncodingBits_R11 *m_pencodingbitsR11; + + //float m_fRedBlockError; + + static const int PIXELS = 16; // 4 * 4 + + // adding data for block reuse (only set on first iteration) + int16_t m_srcPixels[PIXELS]; + int16_t m_redMin; + int16_t m_redMax; + + // this can all be encoded/decoded from the EAC block + int16_t m_redBase; + int16_t m_redMultiplier; + uint8_t m_redSelectors[PIXELS]; + uint8_t m_redModifierTableIndex; + + bool m_isDone; + bool m_isSnorm; // shifts fBase by 128 + + // this is only data needed to reiterate, can decode and build up rest + uint8_t m_uiEncodingIterations; + float m_fError; // 22-bits + 4-bits = 26 bits + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp index 960f083a..ee2e8569 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.cpp @@ -1,68 +1,68 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_RG11.cpp - -Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11). - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_RG11.h" - -namespace Etc -{ - Block4x4Encoding_RG11::Block4x4Encoding_RG11(void) - { - } - - Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {} - - void Block4x4Encoding_RG11::Encode( - const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) - { - m_red.Encode(sourcePixels + 0, encodingBits, isSnorm); - m_green.Encode(sourcePixels + 1, encodingBits + 8, isSnorm); - } - - void Block4x4Encoding_RG11::Decode( - unsigned char *encodingBits, const float *sourcePixels, bool isSnorm, - uint16_t lastIteration) - { - m_red.Decode(encodingBits, sourcePixels, isSnorm, (lastIteration >> 0) & 0xFF); - m_green.Decode(encodingBits + 8, sourcePixels + 1, isSnorm, (lastIteration >> 8) & 0xFF); - } - - void Block4x4Encoding_RG11::DecodeOnly( - const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) - { - m_red.DecodeOnly(encodingBits, decodedPixels, isSnorm); - m_green.DecodeOnly(encodingBits + 8, decodedPixels + 1, isSnorm); - } - - void Block4x4Encoding_RG11::PerformIteration(float a_fEffort) - { - m_red.PerformIteration(a_fEffort); - m_green.PerformIteration(a_fEffort); - } - - void Block4x4Encoding_RG11::SetEncodingBits(void) - { - m_red.SetEncodingBits(); - m_green.SetEncodingBits(); - } -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RG11.cpp + +Block4x4Encoding_RG11 is the encoder to use when targetting file format RG11 and SRG11 (signed RG11). + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RG11.h" + +namespace Etc +{ + Block4x4Encoding_RG11::Block4x4Encoding_RG11(void) + { + } + + Block4x4Encoding_RG11::~Block4x4Encoding_RG11(void) {} + + void Block4x4Encoding_RG11::Encode( + const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) + { + m_red.Encode(sourcePixels + 0, encodingBits, isSnorm); + m_green.Encode(sourcePixels + 1, encodingBits + 8, isSnorm); + } + + void Block4x4Encoding_RG11::Decode( + unsigned char *encodingBits, const float *sourcePixels, bool isSnorm, + uint16_t lastIteration) + { + m_red.Decode(encodingBits, sourcePixels, isSnorm, (lastIteration >> 0) & 0xFF); + m_green.Decode(encodingBits + 8, sourcePixels + 1, isSnorm, (lastIteration >> 8) & 0xFF); + } + + void Block4x4Encoding_RG11::DecodeOnly( + const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) + { + m_red.DecodeOnly(encodingBits, decodedPixels, isSnorm); + m_green.DecodeOnly(encodingBits + 8, decodedPixels + 1, isSnorm); + } + + void Block4x4Encoding_RG11::PerformIteration(float a_fEffort) + { + m_red.PerformIteration(a_fEffort); + m_green.PerformIteration(a_fEffort); + } + + void Block4x4Encoding_RG11::SetEncodingBits(void) + { + m_red.SetEncodingBits(); + m_green.SetEncodingBits(); + } +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h index 71ed4b9a..eca31b0d 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RG11.h @@ -1,66 +1,66 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -//#include "EtcBlock4x4Encoding_RGB8.h" -#include "EtcBlock4x4Encoding_R11.h" - -namespace Etc -{ - class Block4x4EncodingBits_RG11; - - // ################################################################################ - // Block4x4Encoding_RG11 - // ################################################################################ - - class Block4x4Encoding_RG11 : public IBlockEncoding - { - public: - - Block4x4Encoding_RG11(void); - virtual ~Block4x4Encoding_RG11(void); - - virtual void Encode( - const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override; - - virtual void Decode( - uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, uint16_t lastIteration) override; - - virtual void DecodeOnly( - const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) override; - - virtual void PerformIteration(float a_fEffort) override; - - virtual void SetEncodingBits() override; - - virtual bool IsDone() const override { return m_red.IsDone() && m_green.IsDone(); } - - // done bit embedded into high bit of each 8-bit count - // because r and g can be done independently, and with multipass need to skip iteration, though decode/re-encode will occur - virtual uint16_t GetIterationCount() const override { return m_red.GetIterationCount() + (m_green.GetIterationCount() << 8); } - - virtual float GetError() const override { return m_red.GetError() + m_green.GetError(); } - - private: - Block4x4Encoding_R11 m_red; - Block4x4Encoding_R11 m_green; - }; - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +//#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcBlock4x4Encoding_R11.h" + +namespace Etc +{ + class Block4x4EncodingBits_RG11; + + // ################################################################################ + // Block4x4Encoding_RG11 + // ################################################################################ + + class Block4x4Encoding_RG11 : public IBlockEncoding + { + public: + + Block4x4Encoding_RG11(void); + virtual ~Block4x4Encoding_RG11(void); + + virtual void Encode( + const float *sourcePixels, uint8_t *encodingBits, bool isSnorm) override; + + virtual void Decode( + uint8_t *encodingBits, const float *sourcePixels, bool isSnorm, uint16_t lastIteration) override; + + virtual void DecodeOnly( + const uint8_t *encodingBits, float *decodedPixels, bool isSnorm) override; + + virtual void PerformIteration(float a_fEffort) override; + + virtual void SetEncodingBits() override; + + virtual bool IsDone() const override { return m_red.IsDone() && m_green.IsDone(); } + + // done bit embedded into high bit of each 8-bit count + // because r and g can be done independently, and with multipass need to skip iteration, though decode/re-encode will occur + virtual uint16_t GetIterationCount() const override { return m_red.GetIterationCount() + (m_green.GetIterationCount() << 8); } + + virtual float GetError() const override { return m_red.GetError() + m_green.GetError(); } + + private: + Block4x4Encoding_R11 m_red; + Block4x4Encoding_R11 m_green; + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp index a6f0f125..3d6786cf 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.cpp @@ -1,1796 +1,1801 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_RGB8.cpp - -Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8. -This encoder is also used for the ETC2 subset of file format RGBA8. - -Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8. - -*/ - -// TODO: add isGray opimizations where rgb are iterated once for a single radius -// instead of as individual channels. - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_RGB8.h" - -#include "EtcBlock4x4EncodingBits.h" -#include "EtcBlock4x4.h" -#include "EtcMath.h" - -#include -#include -#include -#include -#include - -namespace Etc -{ - float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] = - { - 3.0f / 255.0f, - 6.0f / 255.0f, - 11.0f / 255.0f, - 16.0f / 255.0f, - 23.0f / 255.0f, - 32.0f / 255.0f, - 41.0f / 255.0f, - 64.0f / 255.0f - }; - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void) - { - - m_pencodingbitsRGB8 = nullptr; - - } - - Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {} - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits of a previous encoding - // - void Block4x4Encoding_RGB8::Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount ) - { - - // handle ETC1 modes - Block4x4Encoding_ETC1::Decode(a_pblockParent, - a_paucEncodingBits, a_pafrgbaSource,a_errormetric, iterationCount); - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; - - // detect if there is a T, H or Planar mode present - if (m_pencodingbitsRGB8->differential.diff) - { - int iRed1 = (int)m_pencodingbitsRGB8->differential.red1; - int iDRed2 = m_pencodingbitsRGB8->differential.dred2; - int iRed2 = iRed1 + iDRed2; - - int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1; - int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; - int iGreen2 = iGreen1 + iDGreen2; - - int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1; - int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; - int iBlue2 = iBlue1 + iDBlue2; - - if (iRed2 < 0 || iRed2 > 31) - { - InitFromEncodingBits_T(); - } - else if (iGreen2 < 0 || iGreen2 > 31) - { - InitFromEncodingBits_H(); - } - else if (iBlue2 < 0 || iBlue2 > 31) - { - InitFromEncodingBits_Planar(); - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding if T mode is detected - // - void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void) - { - - m_mode = MODE_T; - - unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + - m_pencodingbitsRGB8->t.red1b); - unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; - unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; - - unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; - unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; - unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); - - m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; - - Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); - - DecodePixels_T(); - - CalcBlockError(); - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding if H mode is detected - // - void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void) - { - - m_mode = MODE_H; - - unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; - unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + - m_pencodingbitsRGB8->h.green1b); - unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + - (m_pencodingbitsRGB8->h.blue1b << 1) + - m_pencodingbitsRGB8->h.blue1c); - - unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; - unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + - m_pencodingbitsRGB8->h.green2b); - unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); - - // used to determine the LSB of the CW - unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); - unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); - - m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); - if (uiRGB1 >= uiRGB2) - { - m_uiCW1++; - } - - Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); - - DecodePixels_H(); - - CalcBlockError(); - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding if Planar mode is detected - // - void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void) - { - - m_mode = MODE_PLANAR; - - unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed; - unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) + - m_pencodingbitsRGB8->planar.originGreen2); - unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) + - (m_pencodingbitsRGB8->planar.originBlue2 << 3) + - (m_pencodingbitsRGB8->planar.originBlue3 << 1) + - m_pencodingbitsRGB8->planar.originBlue4); - - unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) + - m_pencodingbitsRGB8->planar.horizRed2); - unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen; - unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) + - m_pencodingbitsRGB8->planar.horizBlue2); - - unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) + - m_pencodingbitsRGB8->planar.vertRed2); - unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) + - m_pencodingbitsRGB8->planar.vertGreen2); - unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue; - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue); - m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue); - - DecodePixels_Planar(); - - CalcBlockError(); - - } - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort) - { - assert(!m_boolDone); - - switch (m_uiEncodingIterations) - { - case 0: - Block4x4Encoding_ETC1::PerformFirstIteration(); - if (m_boolDone) - { - break; - } - - TryPlanar(0); - SetDoneIfPerfect(); - if (m_boolDone) - { - break; - } - - TryTAndH(0); - break; - - case 1: - Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 2: - Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1); - break; - - case 3: - Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 4: - Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1); - break; - - case 5: - TryPlanar(1); - if (a_fEffort <= 49.5f) - { - m_boolDone = true; - } - break; - - case 6: - TryTAndH(1); - if (a_fEffort <= 59.5f) - { - m_boolDone = true; - } - break; - - case 7: - Block4x4Encoding_ETC1::TryDegenerates1(); - if (a_fEffort <= 69.5f) - { - m_boolDone = true; - } - break; - - case 8: - Block4x4Encoding_ETC1::TryDegenerates2(); - if (a_fEffort <= 79.5f) - { - m_boolDone = true; - } - break; - - case 9: - Block4x4Encoding_ETC1::TryDegenerates3(); - if (a_fEffort <= 89.5f) - { - m_boolDone = true; - } - break; - - case 10: - Block4x4Encoding_ETC1::TryDegenerates4(); - m_boolDone = true; - break; - - default: - assert(0); - break; - } - - m_uiEncodingIterations++; - - SetDoneIfPerfect(); - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in Planar mode - // save this encoding if it improves the error - // - void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius) - { - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_PLANAR; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - } - - encodingTry.CalculatePlanarCornerColors(); - - encodingTry.DecodePixels_Planar(); - - encodingTry.CalcBlockError(); - - if (a_uiRadius > 0) - { - encodingTry.TwiddlePlanar(); - } - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_PLANAR; - m_boolDiff = true; - m_boolFlip = false; - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_frgbaColor3 = encodingTry.m_frgbaColor3; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in T mode or H mode - // save this encoding if it improves the error - // - void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius) - { - - CalculateBaseColorsForTAndH(); - - TryT(a_uiRadius); - - if (!IsDone()) - { - TryH(a_uiRadius); - } - } - - // ---------------------------------------------------------------------------------------------------- - // calculate original values for base colors - // store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2 - // - void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void) - { - - //bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; - - ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f; - - // find pixel farthest from average gray line - unsigned int uiFarthestPixel = 0; - float fFarthestGrayDistance2 = 0.0f; - //unsigned int uiTransparentPixels = 0; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - // don't count transparent -// if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX) -// { -// uiTransparentPixels++; -// } -// else - { - float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage); - - if (fGrayDistance2 > fFarthestGrayDistance2) - { - uiFarthestPixel = uiPixel; - fFarthestGrayDistance2 = fGrayDistance2; - } - } - } - // a transparent block should not reach this method - //assert(uiTransparentPixels < PIXELS); - - // set the original base colors to: - // half way to the farthest pixel and - // the mirror color on the other side of the average - ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f; - m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4(); - m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4(); // the "other side" might be out of range - - // move base colors to find best fit - for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++) - { - // find the center of pixels closest to each color - float fPixelsCloserToColor1 = 0.0f; - ColorFloatRGBA frgbSumPixelsCloserToColor1; - float fPixelsCloserToColor2 = 0.0f; - ColorFloatRGBA frgbSumPixelsCloserToColor2; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - // don't count transparent pixels - // Can't do this, use premul to weight the colors before they are encoded - float alpha = 1.0f; // m_pafrgbaSource[uiPixel].fA; -// if (alpha == 0.0f) -// { -// continue; -// } - - float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH); - float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH); - - ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * alpha; - - if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2) - { - fPixelsCloserToColor1 += alpha; - frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource; - } - else - { - fPixelsCloserToColor2 += alpha; - frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource; - } - } - if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f) - { - break; - } - - ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4(); - ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4(); - - if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR && - frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG && - frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB && - - frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR && - frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG && - frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB) - { - break; - } - - m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels; - m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels; - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in T mode - // save this encoding if it improves the error - // - // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently - // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower - // - void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius) - { - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_T; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - encodingTry.m_fError = FLT_MAX; - } - - int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); - int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); - int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); - - int iMinRed1 = iColor1Red - (int)a_uiRadius; - int iMinGreen1 = iColor1Green - (int)a_uiRadius; - int iMinBlue1 = iColor1Blue - (int)a_uiRadius; - - int iMaxRed1 = iColor1Red + (int)a_uiRadius; - int iMaxGreen1 = iColor1Green + (int)a_uiRadius; - int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; - - if (iMinRed1 < 0) - { - iMinRed1 = 0; - } - if (iMinGreen1 < 0) - { - iMinGreen1 = 0; - } - if (iMinBlue1 < 0) - { - iMinBlue1 = 0; - } - - if (iMaxRed1 > 15) - { - iMaxRed1 = 15; - } - if (iMaxGreen1 > 15) - { - iMaxGreen1 = 15; - } - if (iMaxBlue1 > 15) - { - iMaxBlue1 = 15; - } - - int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); - int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); - int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); - - int iMinRed2 = iColor2Red - (int)a_uiRadius; - int iMinGreen2 = iColor2Green - (int)a_uiRadius; - int iMinBlue2 = iColor2Blue - (int)a_uiRadius; - - int iMaxRed2 = iColor2Red + (int)a_uiRadius; - int iMaxGreen2 = iColor2Green + (int)a_uiRadius; - int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; - - if (iMinRed2 < 0) - { - iMinRed2 = 0; - } - if (iMinGreen2 < 0) - { - iMinGreen2 = 0; - } - if (iMinBlue2 < 0) - { - iMinBlue2 = 0; - } - - if (iMaxRed2 > 15) - { - iMaxRed2 = 15; - } - if (iMaxGreen2 > 15) - { - iMaxGreen2 = 15; - } - if (iMaxBlue2 > 15) - { - iMaxBlue2 = 15; - } - - bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); - - for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) - { - encodingTry.m_uiCW1 = uiDistance; - - // twiddle m_frgbaOriginalColor2_TAndH - // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector - // - for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) - { - for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) - { - for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) - { - if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2)) - { - continue; - } - - for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) - { - if (uiBaseColorSwaps == 0) - { - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - } - else - { - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; - } - - encodingTry.TryT_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - SetDoneIfPerfect(); - if (IsDone()) - { - return; - } - } - } - } - } - } - - // twiddle m_frgbaOriginalColor1_TAndH - for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) - { - for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) - { - for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) - { - if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1)) - { - continue; - } - - for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) - { - if (uiBaseColorSwaps == 0) - { - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; - } - else - { - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - } - - encodingTry.TryT_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - SetDoneIfPerfect(); - if (IsDone()) - { - return; - } - } - } - } - } - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // find best selector combination for TryT - // called on an encodingTry - // - void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - - unsigned int auiBestPixelSelectors[PIXELS]; - float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; - ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; - - assert(SELECTORS == 4); - afrgbaDecodedPixel[0] = m_frgbaColor1; - afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); - afrgbaDecodedPixel[2] = m_frgbaColor2; - afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); - - // try each selector - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - - float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], - uiPixel); - - if (fPixelError < afBestPixelErrors[uiPixel]) - { - afBestPixelErrors[uiPixel] = fPixelError; - auiBestPixelSelectors[uiPixel] = uiSelector; - afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; - } - } - } - - - // add up all of the pixel errors - float fBlockError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - fBlockError += afBestPixelErrors[uiPixel]; - } - - if (fBlockError < m_fError) - { - m_fError = fBlockError; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in T mode - // save this encoding if it improves the error - // - // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently - // TWIDDLE_RADIUS of 2 is WAY too slow - // - void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius) - { - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_H; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - encodingTry.m_fError = FLT_MAX; - } - - int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); - int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); - int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); - - int iMinRed1 = iColor1Red - (int)a_uiRadius; - int iMaxRed1 = iColor1Red + (int)a_uiRadius; - int iMinGreen1 = iColor1Green - (int)a_uiRadius; - int iMaxGreen1 = iColor1Green + (int)a_uiRadius; - int iMinBlue1 = iColor1Blue - (int)a_uiRadius; - int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; - - if (iMinRed1 < 0) - { - iMinRed1 = 0; - } - if (iMaxRed1 > 15) - { - iMaxRed1 = 15; - } - if (iMinGreen1 < 0) - { - iMinGreen1 = 0; - } - if (iMaxGreen1 > 15) - { - iMaxGreen1 = 15; - } - if (iMinBlue1 < 0) - { - iMinBlue1 = 0; - } - if (iMaxBlue1 > 15) - { - iMaxBlue1 = 15; - } - - int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); - int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); - int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); - - int iMinRed2 = iColor2Red - (int)a_uiRadius; - int iMaxRed2 = iColor2Red + (int)a_uiRadius; - int iMinGreen2 = iColor2Green - (int)a_uiRadius; - int iMaxGreen2 = iColor2Green + (int)a_uiRadius; - int iMinBlue2 = iColor2Blue - (int)a_uiRadius; - int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; - - if (iMinRed2 < 0) - { - iMinRed2 = 0; - } - if (iMaxRed2 > 15) - { - iMaxRed2 = 15; - } - - if (iMinGreen2 < 0) - { - iMinGreen2 = 0; - } - if (iMaxGreen2 > 15) - { - iMaxGreen2 = 15; - } - - if (iMinBlue2 < 0) - { - iMinBlue2 = 0; - } - if (iMaxBlue2 > 15) - { - iMaxBlue2 = 15; - } - - bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); - - for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) - { - encodingTry.m_uiCW1 = uiDistance; - - // twiddle m_frgbaOriginalColor1_TAndH - for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) - { - for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) - { - for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) - { - // gray only iterates red - if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1)) - { - continue; - } - - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; - - // if color1 == color2, H encoding issues can pop up, so abort - if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) - { - continue; - } - - encodingTry.TryH_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - SetDoneIfPerfect(); - if (IsDone()) - { - return; - } - } - } - } - } - - // twiddle m_frgbaOriginalColor2_TAndH - for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) - { - for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) - { - for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) - { - // gray only iterates red - if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2)) - { - continue; - } - - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - - // if color1 == color2, H encoding issues can pop up, so abort - if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) - { - continue; - } - - encodingTry.TryH_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - SetDoneIfPerfect(); - if (IsDone()) - { - return; - } - } - } - } - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // find best selector combination for TryH - // called on an encodingTry - // - void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - - unsigned int auiBestPixelSelectors[PIXELS]; - float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; - ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; - - assert(SELECTORS == 4); - afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); - afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); - afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB(); - afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); - - // try each selector - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - - float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], - uiPixel); - - if (fPixelError < afBestPixelErrors[uiPixel]) - { - afBestPixelErrors[uiPixel] = fPixelError; - auiBestPixelSelectors[uiPixel] = uiSelector; - afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; - } - } - } - - - // add up all of the pixel errors - float fBlockError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - fBlockError += afBestPixelErrors[uiPixel]; - } - - if (m_fError > fBlockError) - { - m_fError = fBlockError; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // use linear regression to find the best fit for colors along the edges of the 4x4 block - // - void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void) - { - ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE]; - ColorFloatRGBA frgbaSlope; - ColorFloatRGBA frgbaOffset; - - // top edge - afrgbaRegression[0] = m_pafrgbaSource[0]; - afrgbaRegression[1] = m_pafrgbaSource[4]; - afrgbaRegression[2] = m_pafrgbaSource[8]; - afrgbaRegression[3] = m_pafrgbaSource[12]; - ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); - m_frgbaColor1 = frgbaOffset; - m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset; - - // left edge - afrgbaRegression[0] = m_pafrgbaSource[0]; - afrgbaRegression[1] = m_pafrgbaSource[1]; - afrgbaRegression[2] = m_pafrgbaSource[2]; - afrgbaRegression[3] = m_pafrgbaSource[3]; - ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); - m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f; // average with top edge - m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset; - - // right edge - afrgbaRegression[0] = m_pafrgbaSource[12]; - afrgbaRegression[1] = m_pafrgbaSource[13]; - afrgbaRegression[2] = m_pafrgbaSource[14]; - afrgbaRegression[3] = m_pafrgbaSource[15]; - ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); - m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f; // average with top edge - - // bottom edge - afrgbaRegression[0] = m_pafrgbaSource[3]; - afrgbaRegression[1] = m_pafrgbaSource[7]; - afrgbaRegression[2] = m_pafrgbaSource[11]; - afrgbaRegression[3] = m_pafrgbaSource[15]; - ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); - m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f; // average with left edge - - // quantize corner colors to 6/7/6 - m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6(); - m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6(); - m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6(); - - } - - // ---------------------------------------------------------------------------------------------------- - // try different corner colors by slightly changing R, G and B independently - // - // R, G and B decoding and errors are independent, so R, G and B twiddles can be independent - // - // return true if improvement - // - bool Block4x4Encoding_RGB8::TwiddlePlanar(void) - { - bool boolImprovement = false; - bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); - - while (TwiddlePlanarR()) - { - boolImprovement = true; - } - - if (!isGray) { - while (TwiddlePlanarG()) - { - boolImprovement = true; - } - - while (TwiddlePlanarB()) - { - boolImprovement = true; - } - } - - return boolImprovement; - } - - // ---------------------------------------------------------------------------------------------------- - // try different corner colors by slightly changing R - // - bool Block4x4Encoding_RGB8::TwiddlePlanarR() - { - bool boolImprovement = false; - - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_PLANAR; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - } - - int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f); - int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f); - int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f); - - for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++) - { - // check for out of range - if (iTryOriginRed < 0 || iTryOriginRed > 63) - { - continue; - } - - encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f; - - for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++) - { - // check for out of range - if (iTryHorizRed < 0 || iTryHorizRed > 63) - { - continue; - } - - encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f; - - for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++) - { - // check for out of range - if (iTryVertRed < 0 || iTryVertRed > 63) - { - continue; - } - - // don't bother with null twiddle - if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed) - { - continue; - } - - encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f; - - encodingTry.DecodePixels_Planar(); - - encodingTry.CalcBlockError(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_PLANAR; - m_boolDiff = true; - m_boolFlip = false; - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_frgbaColor3 = encodingTry.m_frgbaColor3; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - boolImprovement = true; - } - } - } - } - - return boolImprovement; - } - - // ---------------------------------------------------------------------------------------------------- - // try different corner colors by slightly changing G - // - bool Block4x4Encoding_RGB8::TwiddlePlanarG() - { - bool boolImprovement = false; - - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_PLANAR; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - } - - int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f); - int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f); - int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f); - - for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++) - { - // check for out of range - if (iTryOriginGreen < 0 || iTryOriginGreen > 127) - { - continue; - } - - encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f; - - for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++) - { - // check for out of range - if (iTryHorizGreen < 0 || iTryHorizGreen > 127) - { - continue; - } - - encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f; - - for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++) - { - // check for out of range - if (iTryVertGreen < 0 || iTryVertGreen > 127) - { - continue; - } - - // don't bother with null twiddle - if (iTryOriginGreen == iOriginGreen && - iTryHorizGreen == iHorizGreen && - iTryVertGreen == iVertGreen) - { - continue; - } - - encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f; - - encodingTry.DecodePixels_Planar(); - - encodingTry.CalcBlockError(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_PLANAR; - m_boolDiff = true; - m_boolFlip = false; - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_frgbaColor3 = encodingTry.m_frgbaColor3; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - boolImprovement = true; - } - } - } - } - - return boolImprovement; - } - - // ---------------------------------------------------------------------------------------------------- - // try different corner colors by slightly changing B - // - bool Block4x4Encoding_RGB8::TwiddlePlanarB() - { - bool boolImprovement = false; - - Block4x4Encoding_RGB8 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_PLANAR; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - } - - int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f); - int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f); - int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f); - - for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++) - { - // check for out of range - if (iTryOriginBlue < 0 || iTryOriginBlue > 63) - { - continue; - } - - encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f; - - for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++) - { - // check for out of range - if (iTryHorizBlue < 0 || iTryHorizBlue > 63) - { - continue; - } - - encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f; - - for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++) - { - // check for out of range - if (iTryVertBlue < 0 || iTryVertBlue > 63) - { - continue; - } - - // don't bother with null twiddle - if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue) - { - continue; - } - - encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f; - - encodingTry.DecodePixels_Planar(); - - encodingTry.CalcBlockError(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_PLANAR; - m_boolDiff = true; - m_boolFlip = false; - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_frgbaColor3 = encodingTry.m_frgbaColor3; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - - boolImprovement = true; - } - } - } - } - - return boolImprovement; - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_RGB8::SetEncodingBits(void) - { - - switch (m_mode) - { - case MODE_ETC1: - Block4x4Encoding_ETC1::SetEncodingBits(); - break; - - case MODE_T: - SetEncodingBits_T(); - break; - - case MODE_H: - SetEncodingBits_H(); - break; - - case MODE_PLANAR: - SetEncodingBits_Planar(); - break; - - default: - assert(false); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state for T mode - // - void Block4x4Encoding_RGB8::SetEncodingBits_T(void) - { - static const bool SANITY_CHECK = true; - - assert(m_mode == MODE_T); - assert(m_boolDiff == true); - - unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); - unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); - unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); - - unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); - unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); - unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); - - m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; - m_pencodingbitsRGB8->t.red1b = uiRed1; - m_pencodingbitsRGB8->t.green1 = uiGreen1; - m_pencodingbitsRGB8->t.blue1 = uiBlue1; - - m_pencodingbitsRGB8->t.red2 = uiRed2; - m_pencodingbitsRGB8->t.green2 = uiGreen2; - m_pencodingbitsRGB8->t.blue2 = uiBlue2; - - m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; - m_pencodingbitsRGB8->t.db = m_uiCW1; - - m_pencodingbitsRGB8->t.diff = 1; - - Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); - - // create an invalid R differential to trigger T mode - m_pencodingbitsRGB8->t.detect1 = 0; - m_pencodingbitsRGB8->t.detect2 = 0; - int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - if (iRed2 >= 4) - { - m_pencodingbitsRGB8->t.detect1 = 7; - m_pencodingbitsRGB8->t.detect2 = 0; - } - else - { - m_pencodingbitsRGB8->t.detect1 = 0; - m_pencodingbitsRGB8->t.detect2 = 1; - } - - if (SANITY_CHECK) - { - iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - - // make sure red overflows - assert(iRed2 < 0 || iRed2 > 31); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state for H mode - // - // colors and selectors may need to swap in order to generate lsb of distance index - // - void Block4x4Encoding_RGB8::SetEncodingBits_H(void) - { - static const bool SANITY_CHECK = true; - - assert(m_mode == MODE_H); - assert(m_boolDiff == true); - - unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); - unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); - unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); - - unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); - unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); - unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); - - unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; - unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; - - bool boolOddDistance = m_uiCW1 & 1; - bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; - - if (boolSwapColors) - { - m_pencodingbitsRGB8->h.red1 = uiRed2; - m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; - m_pencodingbitsRGB8->h.green1b = uiGreen2; - m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; - m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; - m_pencodingbitsRGB8->h.blue1c = uiBlue2; - - m_pencodingbitsRGB8->h.red2 = uiRed1; - m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; - m_pencodingbitsRGB8->h.green2b = uiGreen1; - m_pencodingbitsRGB8->h.blue2 = uiBlue1; - - m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; - m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; - } - else - { - m_pencodingbitsRGB8->h.red1 = uiRed1; - m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; - m_pencodingbitsRGB8->h.green1b = uiGreen1; - m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; - m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; - m_pencodingbitsRGB8->h.blue1c = uiBlue1; - - m_pencodingbitsRGB8->h.red2 = uiRed2; - m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; - m_pencodingbitsRGB8->h.green2b = uiGreen2; - m_pencodingbitsRGB8->h.blue2 = uiBlue2; - - m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; - m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; - } - - m_pencodingbitsRGB8->h.diff = 1; - - Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); - - if (boolSwapColors) - { - m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; - } - - // create an invalid R differential to trigger T mode - m_pencodingbitsRGB8->h.detect1 = 0; - m_pencodingbitsRGB8->h.detect2 = 0; - m_pencodingbitsRGB8->h.detect3 = 0; - int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - if (iRed2 < 0 || iRed2 > 31) - { - m_pencodingbitsRGB8->h.detect1 = 1; - } - if (iGreen2 >= 4) - { - m_pencodingbitsRGB8->h.detect2 = 7; - m_pencodingbitsRGB8->h.detect3 = 0; - } - else - { - m_pencodingbitsRGB8->h.detect2 = 0; - m_pencodingbitsRGB8->h.detect3 = 1; - } - - if (SANITY_CHECK) - { - iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - - // make sure red doesn't overflow and green does - assert(iRed2 >= 0 && iRed2 <= 31); - assert(iGreen2 < 0 || iGreen2 > 31); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state for Planar mode - // - void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void) - { - static const bool SANITY_CHECK = true; - - assert(m_mode == MODE_PLANAR); - assert(m_boolDiff == true); - - unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f); - unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f); - unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f); - - unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f); - unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f); - unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f); - - unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f); - unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f); - unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f); - - m_pencodingbitsRGB8->planar.originRed = uiOriginRed; - m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6; - m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen; - m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5; - m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3; - m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1; - m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue; - - m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1; - m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed; - m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen; - m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5; - m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue; - - m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3; - m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed; - m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2; - m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen; - m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue; - - m_pencodingbitsRGB8->planar.diff = 1; - - // create valid RG differentials and an invalid B differential to trigger planar mode - m_pencodingbitsRGB8->planar.detect1 = 0; - m_pencodingbitsRGB8->planar.detect2 = 0; - m_pencodingbitsRGB8->planar.detect3 = 0; - m_pencodingbitsRGB8->planar.detect4 = 0; - int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; - if (iRed2 < 0 || iRed2 > 31) - { - m_pencodingbitsRGB8->planar.detect1 = 1; - } - if (iGreen2 < 0 || iGreen2 > 31) - { - m_pencodingbitsRGB8->planar.detect2 = 1; - } - if (iBlue2 >= 4) - { - m_pencodingbitsRGB8->planar.detect3 = 7; - m_pencodingbitsRGB8->planar.detect4 = 0; - } - else - { - m_pencodingbitsRGB8->planar.detect3 = 0; - m_pencodingbitsRGB8->planar.detect4 = 1; - } - - if (SANITY_CHECK) - { - iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; - - // make sure red and green don't overflow and blue does - assert(iRed2 >= 0 && iRed2 <= 31); - assert(iGreen2 >= 0 && iGreen2 <= 31); - assert(iBlue2 < 0 || iBlue2 > 31); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the decoded colors and decoded alpha based on the encoding state for T mode - // - void Block4x4Encoding_RGB8::DecodePixels_T(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - switch (m_auiSelectors[uiPixel]) - { - case 0: - m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; - break; - - case 1: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); - break; - - case 2: - m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; - break; - - case 3: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); - break; - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the decoded colors and decoded alpha based on the encoding state for H mode - // - void Block4x4Encoding_RGB8::DecodePixels_H(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - switch (m_auiSelectors[uiPixel]) - { - case 0: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); - break; - - case 1: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); - break; - - case 2: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); - break; - - case 3: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); - break; - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the decoded colors and decoded alpha based on the encoding state for Planar mode - // - void Block4x4Encoding_RGB8::DecodePixels_Planar(void) - { - - int iRO = m_frgbaColor1.IntRed(255.0f); - int iGO = m_frgbaColor1.IntGreen(255.0f); - int iBO = m_frgbaColor1.IntBlue(255.0f); - - int iRH = m_frgbaColor2.IntRed(255.0f); - int iGH = m_frgbaColor2.IntGreen(255.0f); - int iBH = m_frgbaColor2.IntBlue(255.0f); - - int iRV = m_frgbaColor3.IntRed(255.0f); - int iGV = m_frgbaColor3.IntGreen(255.0f); - int iBV = m_frgbaColor3.IntBlue(255.0f); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int iX = (int)(uiPixel >> 2); - int iY = (int)(uiPixel & 3); - - int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2; - int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2; - int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2; - - ColorFloatRGBA frgba; - frgba.fR = (float)iR / 255.0f; - frgba.fG = (float)iG / 255.0f; - frgba.fB = (float)iB / 255.0f; - frgba.fA = 1.0f; - - m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB(); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // perform a linear regression for the a_uiPixels in a_pafrgbaPixels[] - // - // output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset - // - void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, - ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset) - { - typedef struct - { - float f[4]; - } Float4; - - Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels); - Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope); - Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset); - - float afX[MAX_PLANAR_REGRESSION_SIZE]; - float afY[MAX_PLANAR_REGRESSION_SIZE]; - - // handle r, g and b separately. don't bother with a - for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++) - { - for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++) - { - afX[uiPixel] = (float)uiPixel; - afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent]; - - } - Etc::Regression(afX, afY, a_uiPixels, - &(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent])); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGB8.cpp + +Block4x4Encoding_RGB8 is the encoder to use for the ETC2 extensions when targetting file format RGB8. +This encoder is also used for the ETC2 subset of file format RGBA8. + +Block4x4Encoding_ETC1 encodes the ETC1 subset of RGB8. + +*/ + +// TODO: add isGray opimizations where rgb are iterated once for a single radius +// instead of as individual channels. + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGB8.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" +#include "EtcMath.h" + +#include +#include +#include +#include +#include + +namespace Etc +{ + float Block4x4Encoding_RGB8::s_afTHDistanceTable[TH_DISTANCES] = + { + 3.0f / 255.0f, + 6.0f / 255.0f, + 11.0f / 255.0f, + 16.0f / 255.0f, + 23.0f / 255.0f, + 32.0f / 255.0f, + 41.0f / 255.0f, + 64.0f / 255.0f + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGB8::Block4x4Encoding_RGB8(void) + { + + m_pencodingbitsRGB8 = nullptr; + + } + + Block4x4Encoding_RGB8::~Block4x4Encoding_RGB8(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGB8::Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount ) + { + + // handle ETC1 modes + Block4x4Encoding_ETC1::Decode(a_pblockParent, + a_paucEncodingBits, a_pafrgbaSource,a_errormetric, iterationCount); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + // detect if there is a T, H or Planar mode present + if (m_pencodingbitsRGB8->differential.diff) + { + int iRed1 = (int)m_pencodingbitsRGB8->differential.red1; + int iDRed2 = m_pencodingbitsRGB8->differential.dred2; + int iRed2 = iRed1 + iDRed2; + + int iGreen1 = (int)m_pencodingbitsRGB8->differential.green1; + int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; + int iGreen2 = iGreen1 + iDGreen2; + + int iBlue1 = (int)m_pencodingbitsRGB8->differential.blue1; + int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; + int iBlue2 = iBlue1 + iDBlue2; + + if (iRed2 < 0 || iRed2 > 31) + { + InitFromEncodingBits_T(); + } + else if (iGreen2 < 0 || iGreen2 > 31) + { + InitFromEncodingBits_H(); + } + else if (iBlue2 < 0 || iBlue2 > 31) + { + InitFromEncodingBits_Planar(); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if T mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_T(void) + { + + m_mode = MODE_T; + + unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + + m_pencodingbitsRGB8->t.red1b); + unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; + unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; + + unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; + unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; + unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_T(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if H mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_H(void) + { + + m_mode = MODE_H; + + unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; + unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + + m_pencodingbitsRGB8->h.green1b); + unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + + (m_pencodingbitsRGB8->h.blue1b << 1) + + m_pencodingbitsRGB8->h.blue1c); + + unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; + unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + + m_pencodingbitsRGB8->h.green2b); + unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + // used to determine the LSB of the CW + unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); + unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); + if (uiRGB1 >= uiRGB2) + { + m_uiCW1++; + } + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_H(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if Planar mode is detected + // + void Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(void) + { + + m_mode = MODE_PLANAR; + + unsigned char ucOriginRed = m_pencodingbitsRGB8->planar.originRed; + unsigned char ucOriginGreen = (unsigned char)((m_pencodingbitsRGB8->planar.originGreen1 << 6) + + m_pencodingbitsRGB8->planar.originGreen2); + unsigned char ucOriginBlue = (unsigned char)((m_pencodingbitsRGB8->planar.originBlue1 << 5) + + (m_pencodingbitsRGB8->planar.originBlue2 << 3) + + (m_pencodingbitsRGB8->planar.originBlue3 << 1) + + m_pencodingbitsRGB8->planar.originBlue4); + + unsigned char ucHorizRed = (unsigned char)((m_pencodingbitsRGB8->planar.horizRed1 << 1) + + m_pencodingbitsRGB8->planar.horizRed2); + unsigned char ucHorizGreen = m_pencodingbitsRGB8->planar.horizGreen; + unsigned char ucHorizBlue = (unsigned char)((m_pencodingbitsRGB8->planar.horizBlue1 << 5) + + m_pencodingbitsRGB8->planar.horizBlue2); + + unsigned char ucVertRed = (unsigned char)((m_pencodingbitsRGB8->planar.vertRed1 << 3) + + m_pencodingbitsRGB8->planar.vertRed2); + unsigned char ucVertGreen = (unsigned char)((m_pencodingbitsRGB8->planar.vertGreen1 << 2) + + m_pencodingbitsRGB8->planar.vertGreen2); + unsigned char ucVertBlue = m_pencodingbitsRGB8->planar.vertBlue; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromR6G7B6(ucOriginRed, ucOriginGreen, ucOriginBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromR6G7B6(ucHorizRed, ucHorizGreen, ucHorizBlue); + m_frgbaColor3 = ColorFloatRGBA::ConvertFromR6G7B6(ucVertRed, ucVertGreen, ucVertBlue); + + DecodePixels_Planar(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8::PerformIteration(float a_fEffort) + { + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + Block4x4Encoding_ETC1::PerformFirstIteration(); + if (m_boolDone) + { + break; + } + + TryPlanar(0); + SetDoneIfPerfect(); + if (m_boolDone) + { + break; + } + + TryTAndH(0); + break; + + case 1: + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + Block4x4Encoding_ETC1::TryIndividual(m_boolMostLikelyFlip, 1); + break; + + case 3: + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 4: + Block4x4Encoding_ETC1::TryIndividual(!m_boolMostLikelyFlip, 1); + break; + + case 5: + TryPlanar(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryTAndH(1); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 7: + Block4x4Encoding_ETC1::TryDegenerates1(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 8: + Block4x4Encoding_ETC1::TryDegenerates2(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 9: + Block4x4Encoding_ETC1::TryDegenerates3(); + if (a_fEffort <= 89.5f) + { + m_boolDone = true; + } + break; + + case 10: + Block4x4Encoding_ETC1::TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in Planar mode + // save this encoding if it improves the error + // + void Block4x4Encoding_RGB8::TryPlanar(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + encodingTry.CalculatePlanarCornerColors(); + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (a_uiRadius > 0) + { + encodingTry.TwiddlePlanar(); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode or H mode + // save this encoding if it improves the error + // + void Block4x4Encoding_RGB8::TryTAndH(unsigned int a_uiRadius) + { + + CalculateBaseColorsForTAndH(); + + TryT(a_uiRadius); + + if (!IsDone()) + { + TryH(a_uiRadius); + } + } + + // ---------------------------------------------------------------------------------------------------- + // calculate original values for base colors + // store them in m_frgbaOriginalColor1 and m_frgbaOriginalColor2 + // + void Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(void) + { + + //bool boolRGBX = m_pblockParent->GetImageSource()->GetErrorMetric() == ErrorMetric::RGBX; + + ColorFloatRGBA frgbaBlockAverage = (m_frgbaSourceAverageLeft + m_frgbaSourceAverageRight) * 0.5f; + + // find pixel farthest from average gray line + unsigned int uiFarthestPixel = 0; + float fFarthestGrayDistance2 = 0.0f; + //unsigned int uiTransparentPixels = 0; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // don't count transparent +// if (m_pafrgbaSource[uiPixel].fA == 0.0f && !boolRGBX) +// { +// uiTransparentPixels++; +// } +// else + { + float fGrayDistance2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], frgbaBlockAverage); + + if (fGrayDistance2 > fFarthestGrayDistance2) + { + uiFarthestPixel = uiPixel; + fFarthestGrayDistance2 = fGrayDistance2; + } + } + } + // a transparent block should not reach this method + //assert(uiTransparentPixels < PIXELS); + + // set the original base colors to: + // half way to the farthest pixel and + // the mirror color on the other side of the average + ColorFloatRGBA frgbaOffset = (m_pafrgbaSource[uiFarthestPixel] - frgbaBlockAverage) * 0.5f; + m_frgbaOriginalColor1_TAndH = (frgbaBlockAverage + frgbaOffset).QuantizeR4G4B4(); + m_frgbaOriginalColor2_TAndH = (frgbaBlockAverage - frgbaOffset).ClampRGB().QuantizeR4G4B4(); // the "other side" might be out of range + + // move base colors to find best fit + for (unsigned int uiIteration = 0; uiIteration < 10; uiIteration++) + { + // find the center of pixels closest to each color + float fPixelsCloserToColor1 = 0.0f; + ColorFloatRGBA frgbSumPixelsCloserToColor1; + float fPixelsCloserToColor2 = 0.0f; + ColorFloatRGBA frgbSumPixelsCloserToColor2; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // don't count transparent pixels + // Can't do this, use premul to weight the colors before they are encoded + float alpha = 1.0f; // m_pafrgbaSource[uiPixel].fA; +// if (alpha == 0.0f) +// { +// continue; +// } + + float fGrayDistance2ToColor1 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor1_TAndH); + float fGrayDistance2ToColor2 = CalcGrayDistance2(m_pafrgbaSource[uiPixel], m_frgbaOriginalColor2_TAndH); + + ColorFloatRGBA frgbaAlphaWeightedSource = m_pafrgbaSource[uiPixel] * alpha; + frgbaAlphaWeightedSource.fA = 1.0f; + + if (fGrayDistance2ToColor1 <= fGrayDistance2ToColor2) + { + fPixelsCloserToColor1 += alpha; + frgbSumPixelsCloserToColor1 = frgbSumPixelsCloserToColor1 + frgbaAlphaWeightedSource; + } + else + { + fPixelsCloserToColor2 += alpha; + frgbSumPixelsCloserToColor2 = frgbSumPixelsCloserToColor2 + frgbaAlphaWeightedSource; + } + } + if (fPixelsCloserToColor1 == 0.0f || fPixelsCloserToColor2 == 0.0f) + { + break; + } + + // this doesn't scale alpha + ColorFloatRGBA frgbAvgColor1Pixels = (frgbSumPixelsCloserToColor1 * (1.0f / fPixelsCloserToColor1)).QuantizeR4G4B4(); + ColorFloatRGBA frgbAvgColor2Pixels = (frgbSumPixelsCloserToColor2 * (1.0f / fPixelsCloserToColor2)).QuantizeR4G4B4(); + + frgbAvgColor1Pixels.fA = 1.0f; + frgbAvgColor2Pixels.fA = 1.0f; + + if (frgbAvgColor1Pixels.fR == m_frgbaOriginalColor1_TAndH.fR && + frgbAvgColor1Pixels.fG == m_frgbaOriginalColor1_TAndH.fG && + frgbAvgColor1Pixels.fB == m_frgbaOriginalColor1_TAndH.fB && + + frgbAvgColor2Pixels.fR == m_frgbaOriginalColor2_TAndH.fR && + frgbAvgColor2Pixels.fG == m_frgbaOriginalColor2_TAndH.fG && + frgbAvgColor2Pixels.fB == m_frgbaOriginalColor2_TAndH.fB) + { + break; + } + + m_frgbaOriginalColor1_TAndH = frgbAvgColor1Pixels; + m_frgbaOriginalColor2_TAndH = frgbAvgColor2Pixels; + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently + // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower + // + void Block4x4Encoding_RGB8::TryT(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_T; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + + if (iMaxRed1 > 15) + { + iMaxRed1 = 15; + } + if (iMaxGreen1 > 15) + { + iMaxGreen1 = 15; + } + if (iMaxBlue1 > 15) + { + iMaxBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + + if (iMaxRed2 > 15) + { + iMaxRed2 = 15; + } + if (iMaxGreen2 > 15) + { + iMaxGreen2 = 15; + } + if (iMaxBlue2 > 15) + { + iMaxBlue2 = 15; + } + + bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor2_TAndH + // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector + // + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2)) + { + continue; + } + + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + } + else + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + SetDoneIfPerfect(); + if (IsDone()) + { + return; + } + } + } + } + } + } + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1)) + { + continue; + } + + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + } + else + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + SetDoneIfPerfect(); + if (IsDone()) + { + return; + } + } + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryT + // called on an encodingTry + // + void Block4x4Encoding_RGB8::TryT_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = m_frgbaColor1; + afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = m_frgbaColor2; + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], + uiPixel); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (fBlockError < m_fError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently + // TWIDDLE_RADIUS of 2 is WAY too slow + // + void Block4x4Encoding_RGB8::TryH(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_H; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + if (iMaxRed1 > 15) + { + iMaxRed1 = 15; + } + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + if (iMaxGreen1 > 15) + { + iMaxGreen1 = 15; + } + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + if (iMaxBlue1 > 15) + { + iMaxBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + if (iMaxRed2 > 15) + { + iMaxRed2 = 15; + } + + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + if (iMaxGreen2 > 15) + { + iMaxGreen2 = 15; + } + + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + if (iMaxBlue2 > 15) + { + iMaxBlue2 = 15; + } + + bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + // gray only iterates red + if (isGray && (iRed1 != iGreen1 || iRed1 != iBlue1)) + { + continue; + } + + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + SetDoneIfPerfect(); + if (IsDone()) + { + return; + } + } + } + } + } + + // twiddle m_frgbaOriginalColor2_TAndH + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + // gray only iterates red + if (isGray && (iRed2 != iGreen2 || iRed2 != iBlue2)) + { + continue; + } + + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + SetDoneIfPerfect(); + if (IsDone()) + { + return; + } + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryH + // called on an encodingTry + // + void Block4x4Encoding_RGB8::TryH_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); + afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], + uiPixel); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (m_fError > fBlockError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // use linear regression to find the best fit for colors along the edges of the 4x4 block + // + void Block4x4Encoding_RGB8::CalculatePlanarCornerColors(void) + { + ColorFloatRGBA afrgbaRegression[MAX_PLANAR_REGRESSION_SIZE]; + ColorFloatRGBA frgbaSlope; + ColorFloatRGBA frgbaOffset; + + // top edge + afrgbaRegression[0] = m_pafrgbaSource[0]; + afrgbaRegression[1] = m_pafrgbaSource[4]; + afrgbaRegression[2] = m_pafrgbaSource[8]; + afrgbaRegression[3] = m_pafrgbaSource[12]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor1 = frgbaOffset; + m_frgbaColor2 = (frgbaSlope * 4.0f) + frgbaOffset; + + // left edge + afrgbaRegression[0] = m_pafrgbaSource[0]; + afrgbaRegression[1] = m_pafrgbaSource[1]; + afrgbaRegression[2] = m_pafrgbaSource[2]; + afrgbaRegression[3] = m_pafrgbaSource[3]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor1 = (m_frgbaColor1 + frgbaOffset) * 0.5f; // average with top edge + m_frgbaColor3 = (frgbaSlope * 4.0f) + frgbaOffset; + + // right edge + afrgbaRegression[0] = m_pafrgbaSource[12]; + afrgbaRegression[1] = m_pafrgbaSource[13]; + afrgbaRegression[2] = m_pafrgbaSource[14]; + afrgbaRegression[3] = m_pafrgbaSource[15]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor2 = (m_frgbaColor2 + frgbaOffset) * 0.5f; // average with top edge + + // bottom edge + afrgbaRegression[0] = m_pafrgbaSource[3]; + afrgbaRegression[1] = m_pafrgbaSource[7]; + afrgbaRegression[2] = m_pafrgbaSource[11]; + afrgbaRegression[3] = m_pafrgbaSource[15]; + ColorRegression(afrgbaRegression, 4, &frgbaSlope, &frgbaOffset); + m_frgbaColor3 = (m_frgbaColor3 + frgbaOffset) * 0.5f; // average with left edge + + // quantize corner colors to 6/7/6 + m_frgbaColor1 = m_frgbaColor1.QuantizeR6G7B6(); + m_frgbaColor2 = m_frgbaColor2.QuantizeR6G7B6(); + m_frgbaColor3 = m_frgbaColor3.QuantizeR6G7B6(); + + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing R, G and B independently + // + // R, G and B decoding and errors are independent, so R, G and B twiddles can be independent + // + // return true if improvement + // + bool Block4x4Encoding_RGB8::TwiddlePlanar(void) + { + bool boolImprovement = false; + bool isGray = m_errormetric == GRAY || !m_pblockParent->HasColorPixels(); + + while (TwiddlePlanarR()) + { + boolImprovement = true; + } + + if (!isGray) { + while (TwiddlePlanarG()) + { + boolImprovement = true; + } + + while (TwiddlePlanarB()) + { + boolImprovement = true; + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing R + // + bool Block4x4Encoding_RGB8::TwiddlePlanarR() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginRed = encodingTry.m_frgbaColor1.IntRed(63.0f); + int iHorizRed = encodingTry.m_frgbaColor2.IntRed(63.0f); + int iVertRed = encodingTry.m_frgbaColor3.IntRed(63.0f); + + for (int iTryOriginRed = iOriginRed - 1; iTryOriginRed <= iOriginRed + 1; iTryOriginRed++) + { + // check for out of range + if (iTryOriginRed < 0 || iTryOriginRed > 63) + { + continue; + } + + encodingTry.m_frgbaColor1.fR = ((iTryOriginRed << 2) + (iTryOriginRed >> 4)) / 255.0f; + + for (int iTryHorizRed = iHorizRed - 1; iTryHorizRed <= iHorizRed + 1; iTryHorizRed++) + { + // check for out of range + if (iTryHorizRed < 0 || iTryHorizRed > 63) + { + continue; + } + + encodingTry.m_frgbaColor2.fR = ((iTryHorizRed << 2) + (iTryHorizRed >> 4)) / 255.0f; + + for (int iTryVertRed = iVertRed - 1; iTryVertRed <= iVertRed + 1; iTryVertRed++) + { + // check for out of range + if (iTryVertRed < 0 || iTryVertRed > 63) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginRed == iOriginRed && iTryHorizRed == iHorizRed && iTryVertRed == iVertRed) + { + continue; + } + + encodingTry.m_frgbaColor3.fR = ((iTryVertRed << 2) + (iTryVertRed >> 4)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing G + // + bool Block4x4Encoding_RGB8::TwiddlePlanarG() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginGreen = encodingTry.m_frgbaColor1.IntGreen(127.0f); + int iHorizGreen = encodingTry.m_frgbaColor2.IntGreen(127.0f); + int iVertGreen = encodingTry.m_frgbaColor3.IntGreen(127.0f); + + for (int iTryOriginGreen = iOriginGreen - 1; iTryOriginGreen <= iOriginGreen + 1; iTryOriginGreen++) + { + // check for out of range + if (iTryOriginGreen < 0 || iTryOriginGreen > 127) + { + continue; + } + + encodingTry.m_frgbaColor1.fG = ((iTryOriginGreen << 1) + (iTryOriginGreen >> 6)) / 255.0f; + + for (int iTryHorizGreen = iHorizGreen - 1; iTryHorizGreen <= iHorizGreen + 1; iTryHorizGreen++) + { + // check for out of range + if (iTryHorizGreen < 0 || iTryHorizGreen > 127) + { + continue; + } + + encodingTry.m_frgbaColor2.fG = ((iTryHorizGreen << 1) + (iTryHorizGreen >> 6)) / 255.0f; + + for (int iTryVertGreen = iVertGreen - 1; iTryVertGreen <= iVertGreen + 1; iTryVertGreen++) + { + // check for out of range + if (iTryVertGreen < 0 || iTryVertGreen > 127) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginGreen == iOriginGreen && + iTryHorizGreen == iHorizGreen && + iTryVertGreen == iVertGreen) + { + continue; + } + + encodingTry.m_frgbaColor3.fG = ((iTryVertGreen << 1) + (iTryVertGreen >> 6)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // try different corner colors by slightly changing B + // + bool Block4x4Encoding_RGB8::TwiddlePlanarB() + { + bool boolImprovement = false; + + Block4x4Encoding_RGB8 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_PLANAR; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + } + + int iOriginBlue = encodingTry.m_frgbaColor1.IntBlue(63.0f); + int iHorizBlue = encodingTry.m_frgbaColor2.IntBlue(63.0f); + int iVertBlue = encodingTry.m_frgbaColor3.IntBlue(63.0f); + + for (int iTryOriginBlue = iOriginBlue - 1; iTryOriginBlue <= iOriginBlue + 1; iTryOriginBlue++) + { + // check for out of range + if (iTryOriginBlue < 0 || iTryOriginBlue > 63) + { + continue; + } + + encodingTry.m_frgbaColor1.fB = ((iTryOriginBlue << 2) + (iTryOriginBlue >> 4)) / 255.0f; + + for (int iTryHorizBlue = iHorizBlue - 1; iTryHorizBlue <= iHorizBlue + 1; iTryHorizBlue++) + { + // check for out of range + if (iTryHorizBlue < 0 || iTryHorizBlue > 63) + { + continue; + } + + encodingTry.m_frgbaColor2.fB = ((iTryHorizBlue << 2) + (iTryHorizBlue >> 4)) / 255.0f; + + for (int iTryVertBlue = iVertBlue - 1; iTryVertBlue <= iVertBlue + 1; iTryVertBlue++) + { + // check for out of range + if (iTryVertBlue < 0 || iTryVertBlue > 63) + { + continue; + } + + // don't bother with null twiddle + if (iTryOriginBlue == iOriginBlue && iTryHorizBlue == iHorizBlue && iTryVertBlue == iVertBlue) + { + continue; + } + + encodingTry.m_frgbaColor3.fB = ((iTryVertBlue << 2) + (iTryVertBlue >> 4)) / 255.0f; + + encodingTry.DecodePixels_Planar(); + + encodingTry.CalcBlockError(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_PLANAR; + m_boolDiff = true; + m_boolFlip = false; + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_frgbaColor3 = encodingTry.m_frgbaColor3; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + + boolImprovement = true; + } + } + } + } + + return boolImprovement; + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGB8::SetEncodingBits(void) + { + + switch (m_mode) + { + case MODE_ETC1: + Block4x4Encoding_ETC1::SetEncodingBits(); + break; + + case MODE_T: + SetEncodingBits_T(); + break; + + case MODE_H: + SetEncodingBits_H(); + break; + + case MODE_PLANAR: + SetEncodingBits_Planar(); + break; + + default: + assert(false); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for T mode + // + void Block4x4Encoding_RGB8::SetEncodingBits_T(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_T); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; + m_pencodingbitsRGB8->t.red1b = uiRed1; + m_pencodingbitsRGB8->t.green1 = uiGreen1; + m_pencodingbitsRGB8->t.blue1 = uiBlue1; + + m_pencodingbitsRGB8->t.red2 = uiRed2; + m_pencodingbitsRGB8->t.green2 = uiGreen2; + m_pencodingbitsRGB8->t.blue2 = uiBlue2; + + m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; + m_pencodingbitsRGB8->t.db = m_uiCW1; + + m_pencodingbitsRGB8->t.diff = 1; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + if (iRed2 >= 4) + { + m_pencodingbitsRGB8->t.detect1 = 7; + m_pencodingbitsRGB8->t.detect2 = 0; + } + else + { + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + + // make sure red overflows + assert(iRed2 < 0 || iRed2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for H mode + // + // colors and selectors may need to swap in order to generate lsb of distance index + // + void Block4x4Encoding_RGB8::SetEncodingBits_H(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_H); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + bool boolOddDistance = m_uiCW1 & 1; + bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.red1 = uiRed2; + m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen2; + m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue2; + + m_pencodingbitsRGB8->h.red2 = uiRed1; + m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen1; + m_pencodingbitsRGB8->h.blue2 = uiBlue1; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + else + { + m_pencodingbitsRGB8->h.red1 = uiRed1; + m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen1; + m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue1; + + m_pencodingbitsRGB8->h.red2 = uiRed2; + m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen2; + m_pencodingbitsRGB8->h.blue2 = uiBlue2; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + + m_pencodingbitsRGB8->h.diff = 1; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; + } + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->h.detect1 = 0; + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->h.detect1 = 1; + } + if (iGreen2 >= 4) + { + m_pencodingbitsRGB8->h.detect2 = 7; + m_pencodingbitsRGB8->h.detect3 = 0; + } + else + { + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + + // make sure red doesn't overflow and green does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 < 0 || iGreen2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state for Planar mode + // + void Block4x4Encoding_RGB8::SetEncodingBits_Planar(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_PLANAR); + assert(m_boolDiff == true); + + unsigned int uiOriginRed = (unsigned int)m_frgbaColor1.IntRed(63.0f); + unsigned int uiOriginGreen = (unsigned int)m_frgbaColor1.IntGreen(127.0f); + unsigned int uiOriginBlue = (unsigned int)m_frgbaColor1.IntBlue(63.0f); + + unsigned int uiHorizRed = (unsigned int)m_frgbaColor2.IntRed(63.0f); + unsigned int uiHorizGreen = (unsigned int)m_frgbaColor2.IntGreen(127.0f); + unsigned int uiHorizBlue = (unsigned int)m_frgbaColor2.IntBlue(63.0f); + + unsigned int uiVertRed = (unsigned int)m_frgbaColor3.IntRed(63.0f); + unsigned int uiVertGreen = (unsigned int)m_frgbaColor3.IntGreen(127.0f); + unsigned int uiVertBlue = (unsigned int)m_frgbaColor3.IntBlue(63.0f); + + m_pencodingbitsRGB8->planar.originRed = uiOriginRed; + m_pencodingbitsRGB8->planar.originGreen1 = uiOriginGreen >> 6; + m_pencodingbitsRGB8->planar.originGreen2 = uiOriginGreen; + m_pencodingbitsRGB8->planar.originBlue1 = uiOriginBlue >> 5; + m_pencodingbitsRGB8->planar.originBlue2 = uiOriginBlue >> 3; + m_pencodingbitsRGB8->planar.originBlue3 = uiOriginBlue >> 1; + m_pencodingbitsRGB8->planar.originBlue4 = uiOriginBlue; + + m_pencodingbitsRGB8->planar.horizRed1 = uiHorizRed >> 1; + m_pencodingbitsRGB8->planar.horizRed2 = uiHorizRed; + m_pencodingbitsRGB8->planar.horizGreen = uiHorizGreen; + m_pencodingbitsRGB8->planar.horizBlue1 = uiHorizBlue >> 5; + m_pencodingbitsRGB8->planar.horizBlue2 = uiHorizBlue; + + m_pencodingbitsRGB8->planar.vertRed1 = uiVertRed >> 3; + m_pencodingbitsRGB8->planar.vertRed2 = uiVertRed; + m_pencodingbitsRGB8->planar.vertGreen1 = uiVertGreen >> 2; + m_pencodingbitsRGB8->planar.vertGreen2 = uiVertGreen; + m_pencodingbitsRGB8->planar.vertBlue = uiVertBlue; + + m_pencodingbitsRGB8->planar.diff = 1; + + // create valid RG differentials and an invalid B differential to trigger planar mode + m_pencodingbitsRGB8->planar.detect1 = 0; + m_pencodingbitsRGB8->planar.detect2 = 0; + m_pencodingbitsRGB8->planar.detect3 = 0; + m_pencodingbitsRGB8->planar.detect4 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + int iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->planar.detect1 = 1; + } + if (iGreen2 < 0 || iGreen2 > 31) + { + m_pencodingbitsRGB8->planar.detect2 = 1; + } + if (iBlue2 >= 4) + { + m_pencodingbitsRGB8->planar.detect3 = 7; + m_pencodingbitsRGB8->planar.detect4 = 0; + } + else + { + m_pencodingbitsRGB8->planar.detect3 = 0; + m_pencodingbitsRGB8->planar.detect4 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + iBlue2 = (int)m_pencodingbitsRGB8->differential.blue1 + (int)m_pencodingbitsRGB8->differential.dblue2; + + // make sure red and green don't overflow and blue does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 >= 0 && iGreen2 <= 31); + assert(iBlue2 < 0 || iBlue2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for T mode + // + void Block4x4Encoding_RGB8::DecodePixels_T(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + break; + + case 2: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for H mode + // + void Block4x4Encoding_RGB8::DecodePixels_H(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); + break; + + case 2: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the decoded colors and decoded alpha based on the encoding state for Planar mode + // + void Block4x4Encoding_RGB8::DecodePixels_Planar(void) + { + + int iRO = m_frgbaColor1.IntRed(255.0f); + int iGO = m_frgbaColor1.IntGreen(255.0f); + int iBO = m_frgbaColor1.IntBlue(255.0f); + + int iRH = m_frgbaColor2.IntRed(255.0f); + int iGH = m_frgbaColor2.IntGreen(255.0f); + int iBH = m_frgbaColor2.IntBlue(255.0f); + + int iRV = m_frgbaColor3.IntRed(255.0f); + int iGV = m_frgbaColor3.IntGreen(255.0f); + int iBV = m_frgbaColor3.IntBlue(255.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int iX = (int)(uiPixel >> 2); + int iY = (int)(uiPixel & 3); + + int iR = (iX*(iRH - iRO) + iY*(iRV - iRO) + 4*iRO + 2) >> 2; + int iG = (iX*(iGH - iGO) + iY*(iGV - iGO) + 4*iGO + 2) >> 2; + int iB = (iX*(iBH - iBO) + iY*(iBV - iBO) + 4*iBO + 2) >> 2; + + ColorFloatRGBA frgba; + frgba.fR = (float)iR / 255.0f; + frgba.fG = (float)iG / 255.0f; + frgba.fB = (float)iB / 255.0f; + frgba.fA = 1.0f; + + m_afrgbaDecodedColors[uiPixel] = frgba.ClampRGB(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a linear regression for the a_uiPixels in a_pafrgbaPixels[] + // + // output the closest color line using a_pfrgbaSlope and a_pfrgbaOffset + // + void Block4x4Encoding_RGB8::ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, + ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset) + { + typedef struct + { + float f[4]; + } Float4; + + Float4 *paf4Pixels = (Float4 *)(a_pafrgbaPixels); + Float4 *pf4Slope = (Float4 *)(a_pfrgbaSlope); + Float4 *pf4Offset = (Float4 *)(a_pfrgbaOffset); + + float afX[MAX_PLANAR_REGRESSION_SIZE]; + float afY[MAX_PLANAR_REGRESSION_SIZE]; + + // handle r, g and b separately. don't bother with a + for (unsigned int uiComponent = 0; uiComponent < 3; uiComponent++) + { + for (unsigned int uiPixel = 0; uiPixel < a_uiPixels; uiPixel++) + { + afX[uiPixel] = (float)uiPixel; + afY[uiPixel] = paf4Pixels[uiPixel].f[uiComponent]; + + } + Etc::Regression(afX, afY, a_uiPixels, + &(pf4Slope->f[uiComponent]), &(pf4Offset->f[uiComponent])); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h index c4d6c3e9..f49222c7 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8.h @@ -1,97 +1,97 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcBlock4x4Encoding_ETC1.h" - -namespace Etc -{ - - class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1 - { - public: - - Block4x4Encoding_RGB8(void); - virtual ~Block4x4Encoding_RGB8(void); - - virtual void Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - - ErrorMetric a_errormetric, - uint16_t iterationCount); - - virtual void PerformIteration(float a_fEffort); - - virtual void SetEncodingBits(void); - -// inline ColorFloatRGBA GetColor3(void) const -// { -// return m_frgbaColor3; -// } - - protected: - - static const unsigned int PLANAR_CORNER_COLORS = 3; - static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4; - static const unsigned int TH_DISTANCES = 8; - - static float s_afTHDistanceTable[TH_DISTANCES]; - - void TryPlanar(unsigned int a_uiRadius); - void TryTAndH(unsigned int a_uiRadius); - - void InitFromEncodingBits_Planar(void); - - ColorFloatRGBA m_frgbaColor3; // used for planar - - void SetEncodingBits_T(void); - void SetEncodingBits_H(void); - void SetEncodingBits_Planar(void); - - // state shared between iterations - ColorFloatRGBA m_frgbaOriginalColor1_TAndH; - ColorFloatRGBA m_frgbaOriginalColor2_TAndH; - - void CalculateBaseColorsForTAndH(void); - void TryT(unsigned int a_uiRadius); - void TryT_BestSelectorCombination(void); - void TryH(unsigned int a_uiRadius); - void TryH_BestSelectorCombination(void); - - protected: - - void InitFromEncodingBits_T(void); - void InitFromEncodingBits_H(void); - - void CalculatePlanarCornerColors(void); - - void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, - ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset); - - bool TwiddlePlanar(void); - bool TwiddlePlanarR(); - bool TwiddlePlanarG(); - bool TwiddlePlanarB(); - - void DecodePixels_T(void); - void DecodePixels_H(void); - void DecodePixels_Planar(void); - - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_ETC1.h" + +namespace Etc +{ + + class Block4x4Encoding_RGB8 : public Block4x4Encoding_ETC1 + { + public: + + Block4x4Encoding_RGB8(void); + virtual ~Block4x4Encoding_RGB8(void); + + virtual void Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + + ErrorMetric a_errormetric, + uint16_t iterationCount); + + virtual void PerformIteration(float a_fEffort); + + virtual void SetEncodingBits(void); + +// inline ColorFloatRGBA GetColor3(void) const +// { +// return m_frgbaColor3; +// } + + protected: + + static const unsigned int PLANAR_CORNER_COLORS = 3; + static const unsigned int MAX_PLANAR_REGRESSION_SIZE = 4; + static const unsigned int TH_DISTANCES = 8; + + static float s_afTHDistanceTable[TH_DISTANCES]; + + void TryPlanar(unsigned int a_uiRadius); + void TryTAndH(unsigned int a_uiRadius); + + void InitFromEncodingBits_Planar(void); + + ColorFloatRGBA m_frgbaColor3; // used for planar + + void SetEncodingBits_T(void); + void SetEncodingBits_H(void); + void SetEncodingBits_Planar(void); + + // state shared between iterations + ColorFloatRGBA m_frgbaOriginalColor1_TAndH; + ColorFloatRGBA m_frgbaOriginalColor2_TAndH; + + void CalculateBaseColorsForTAndH(void); + void TryT(unsigned int a_uiRadius); + void TryT_BestSelectorCombination(void); + void TryH(unsigned int a_uiRadius); + void TryH_BestSelectorCombination(void); + + protected: + + void InitFromEncodingBits_T(void); + void InitFromEncodingBits_H(void); + + void CalculatePlanarCornerColors(void); + + void ColorRegression(ColorFloatRGBA *a_pafrgbaPixels, unsigned int a_uiPixels, + ColorFloatRGBA *a_pfrgbaSlope, ColorFloatRGBA *a_pfrgbaOffset); + + bool TwiddlePlanar(void); + bool TwiddlePlanarR(); + bool TwiddlePlanarG(); + bool TwiddlePlanarB(); + + void DecodePixels_T(void); + void DecodePixels_H(void); + void DecodePixels_Planar(void); + + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp index f6d70264..41dfc0a4 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.cpp @@ -1,1829 +1,1829 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_RGB8A1.cpp contains: - Block4x4Encoding_RGB8A1 - Block4x4Encoding_RGB8A1_Opaque - Block4x4Encoding_RGB8A1_Transparent - -These encoders are used when targetting file format RGB8A1. - -Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque -Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent -Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_RGB8A1.h" - -#include "EtcBlock4x4.h" -#include "EtcBlock4x4EncodingBits.h" -#include "EtcBlock4x4Encoding_RGB8.h" - -#include -#include -#include - -namespace Etc -{ - - // #################################################################################################### - // Block4x4Encoding_RGB8A1 - // #################################################################################################### - - float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] = - { - { 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f }, - { 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f }, - { 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f }, - { 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f }, - { 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f }, - { 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f }, - { 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f }, - { 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f } - }; - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void) - { - m_pencodingbitsRGB8 = nullptr; - m_boolOpaque = false; - m_boolTransparent = false; - m_boolPunchThroughPixels = true; - - } - Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {} - // ---------------------------------------------------------------------------------------------------- - // initialization prior to encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits - // - void Block4x4Encoding_RGB8A1::Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, - ErrorMetric a_errormetric) - { - - Block4x4Encoding_RGB8::Encode(a_pblockParent, - a_pafrgbaSource, - a_paucEncodingBits, - a_errormetric); - - m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE; - m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT; - m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels(); - -// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) -// { -// if (m_pafrgbaSource[uiPixel].fA >= 0.5f) -// { -// m_afDecodedAlphas[uiPixel] = 1.0f; -// } -// else -// { -// m_afDecodedAlphas[uiPixel] = 0.0f; -// } -// } - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits of a previous encoding - // - void Block4x4Encoding_RGB8A1::Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) - { - - - InitFromEncodingBits_ETC1(a_pblockParent, - a_paucEncodingBits, - a_pafrgbaSource, - a_errormetric, iterationCount); - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; - - // detect if there is a T, H or Planar mode present - int iRed1 = m_pencodingbitsRGB8->differential.red1; - int iDRed2 = m_pencodingbitsRGB8->differential.dred2; - int iRed2 = iRed1 + iDRed2; - - int iGreen1 = m_pencodingbitsRGB8->differential.green1; - int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; - int iGreen2 = iGreen1 + iDGreen2; - - int iBlue1 = m_pencodingbitsRGB8->differential.blue1; - int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; - int iBlue2 = iBlue1 + iDBlue2; - - if (iRed2 < 0 || iRed2 > 31) - { - InitFromEncodingBits_T(); - } - else if (iGreen2 < 0 || iGreen2 > 31) - { - InitFromEncodingBits_H(); - } - else if (iBlue2 < 0 || iBlue2 > 31) - { - Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode. - // if it isn't an ETC1 mode, this will be overwritten later - // - void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, uint16_t iterationCount) - { - Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, - a_errormetric, iterationCount); - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; - - m_mode = MODE_ETC1; - m_boolDiff = true; - m_boolFlip = m_pencodingbitsRGB8->differential.flip; - m_boolOpaque = m_pencodingbitsRGB8->differential.diff; - - int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2; - int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2; - int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2; - - if (iR2 < 0) - { - iR2 = 0; - } - else if (iR2 > 31) - { - iR2 = 31; - } - - if (iG2 < 0) - { - iG2 = 0; - } - else if (iG2 > 31) - { - iG2 = 31; - } - - if (iB2 < 0) - { - iB2 = 0; - } - else if (iB2 > 31) - { - iB2 = 31; - } - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); - - m_uiCW1 = m_pencodingbitsRGB8->differential.cw1; - m_uiCW2 = m_pencodingbitsRGB8->differential.cw2; - - Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); - - Decode_ETC1(); - - CalcBlockError(); - - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding if T mode is detected - // - void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void) - { - m_mode = MODE_T; - - unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + - m_pencodingbitsRGB8->t.red1b); - unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; - unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; - - unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; - unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; - unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); - - m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; - - Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); - - DecodePixels_T(); - - CalcBlockError(); - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding if H mode is detected - // - void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void) - { - m_mode = MODE_H; - - unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; - unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + - m_pencodingbitsRGB8->h.green1b); - unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + - (m_pencodingbitsRGB8->h.blue1b << 1) + - m_pencodingbitsRGB8->h.blue1c); - - unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; - unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + - m_pencodingbitsRGB8->h.green2b); - unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; - - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); - - // used to determine the LSB of the CW - unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); - unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); - - m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); - if (uiRGB1 >= uiRGB2) - { - m_uiCW1++; - } - - Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); - - DecodePixels_H(); - - CalcBlockError(); - } - - // ---------------------------------------------------------------------------------------------------- - // for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state - // - void Block4x4Encoding_RGB8A1::Decode_ETC1(void) - { - - const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; - - for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) - { - ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; - unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; - - unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; - - float fDelta; - if (m_boolOpaque) - fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; - else - fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]]; - - if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR) - { - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel] = 0.0f; - } - else - { - m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - } - - // TODO: this isn't setting alpha - } - - } - - // ---------------------------------------------------------------------------------------------------- - // for T mode, set the decoded colors and decoded alpha based on the encoding state - // - void Block4x4Encoding_RGB8A1::DecodePixels_T(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - switch (m_auiSelectors[uiPixel]) - { - case 0: - m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - - case 1: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - - case 2: - if (m_boolOpaque == false) - { - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel] = 0.0f; - } - else - { - m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; - //m_afDecodedAlphas[uiPixel] = 1.0f; - } - break; - - case 3: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // for H mode, set the decoded colors and decoded alpha based on the encoding state - // - void Block4x4Encoding_RGB8A1::DecodePixels_H(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - switch (m_auiSelectors[uiPixel]) - { - case 0: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - - case 1: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - - case 2: - if (m_boolOpaque == false) - { - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel] = 0.0f; - } - else - { - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - } - break; - - case 3: - m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); - //m_afDecodedAlphas[uiPixel] = 1.0f; - break; - } - - } - - } - - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - // RGB8A1 can't use individual mode - // RGB8A1 with transparent pixels can't use planar mode - // - void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort) - { - if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE) - { - PerformIterationOpaque(a_fEffort); - return; - } - else if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT) - { - PerformIterationTransparent(a_fEffort); - return; - } - - assert(!m_boolOpaque); - assert(!m_boolTransparent); - assert(!m_boolDone); - - switch (m_uiEncodingIterations) - { - case 0: - PerformFirstIteration(); - break; - - case 1: - TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 2: - TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); - if (a_fEffort <= 39.5f) - { - m_boolDone = true; - } - break; - - case 3: - Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(); - TryT(1); - TryH(1); - if (a_fEffort <= 49.5f) - { - m_boolDone = true; - } - break; - - case 4: - TryDegenerates1(); - if (a_fEffort <= 59.5f) - { - m_boolDone = true; - } - break; - - case 5: - TryDegenerates2(); - if (a_fEffort <= 69.5f) - { - m_boolDone = true; - } - break; - - case 6: - TryDegenerates3(); - if (a_fEffort <= 79.5f) - { - m_boolDone = true; - } - break; - - case 7: - TryDegenerates4(); - m_boolDone = true; - break; - - default: - assert(0); - break; - } - - m_uiEncodingIterations++; - - SetDoneIfPerfect(); - - } - - // ---------------------------------------------------------------------------------------------------- - // find best initial encoding to ensure block has a valid encoding - // - void Block4x4Encoding_RGB8A1::PerformFirstIteration(void) - { - Block4x4Encoding_ETC1::CalculateMostLikelyFlip(); - - m_fError = FLT_MAX; - - TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - - } - - // ---------------------------------------------------------------------------------------------------- - // mostly copied from ETC1 - // differences: - // Block4x4Encoding_RGB8A1 encodingTry = *this; - // - void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2) - { - - ColorFloatRGBA frgbaColor1; - ColorFloatRGBA frgbaColor2; - - const unsigned int *pauiPixelMapping1; - const unsigned int *pauiPixelMapping2; - - if (a_boolFlip) - { - frgbaColor1 = m_frgbaSourceAverageTop; - frgbaColor2 = m_frgbaSourceAverageBottom; - - pauiPixelMapping1 = s_auiTopPixelMapping; - pauiPixelMapping2 = s_auiBottomPixelMapping; - } - else - { - frgbaColor1 = m_frgbaSourceAverageLeft; - frgbaColor2 = m_frgbaSourceAverageRight; - - pauiPixelMapping1 = s_auiLeftPixelMapping; - pauiPixelMapping2 = s_auiRightPixelMapping; - } - - DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, - a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); - - Block4x4Encoding_RGB8A1 encodingTry = *this; - encodingTry.m_boolFlip = a_boolFlip; - - encodingTry.TryDifferentialHalf(&trys.m_half1); - encodingTry.TryDifferentialHalf(&trys.m_half2); - - // find best halves that are within differential range - DifferentialTrys::Try *ptryBest1 = nullptr; - DifferentialTrys::Try *ptryBest2 = nullptr; - encodingTry.m_fError = FLT_MAX; - - // see if the best of each half are in differential range - int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; - int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; - int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; - if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) - { - ptryBest1 = trys.m_half1.m_ptryBest; - ptryBest2 = trys.m_half2.m_ptryBest; - encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; - } - else - { - // else, find the next best halves that are in differential range - for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; - ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; - ptry1++) - { - for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; - ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; - ptry2++) - { - iDRed = ptry2->m_iRed - ptry1->m_iRed; - bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; - iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; - bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; - iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; - bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; - - if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) - { - float fError = ptry1->m_fError + ptry2->m_fError; - - if (fError < encodingTry.m_fError) - { - encodingTry.m_fError = fError; - - ptryBest1 = ptry1; - ptryBest2 = ptry2; - } - } - - } - } - assert(encodingTry.m_fError < FLT_MAX); - assert(ptryBest1 != nullptr); - assert(ptryBest2 != nullptr); - } - - if (encodingTry.m_fError < m_fError) - { - m_mode = MODE_ETC1; - m_boolDiff = true; - m_boolFlip = encodingTry.m_boolFlip; - m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); - m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); - m_uiCW1 = ptryBest1->m_uiCW; - m_uiCW2 = ptryBest2->m_uiCW; - - m_fError = 0.0f; - for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) - { - unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; - unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; - - unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; - unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; - - m_auiSelectors[uiPixel1] = uiSelector1; - m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; - - float alpha1 = 1.0; - float alpha2 = 1.0; - - if (uiSelector1 == TRANSPARENT_SELECTOR) - { - m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel1] = 0.0f; - alpha1 = 0.0; - } - else - { - float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1]; - m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); - //m_afDecodedAlphas[uiPixel1] = 1.0f; - } - - if (uiSelector2 == TRANSPARENT_SELECTOR) - { - m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel2] = 0.0f; - alpha2 = 0.0; - } - else - { - float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2]; - m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); - //m_afDecodedAlphas[uiPixel2] = 1.0f; - } - - float fDeltaA1 =alpha1 - m_pafrgbaSource[uiPixel1].fA; - m_fError += fDeltaA1 * fDeltaA1; - float fDeltaA2 = alpha2 - m_pafrgbaSource[uiPixel2].fA; - m_fError += fDeltaA2 * fDeltaA2; - } - - m_fError1 = ptryBest1->m_fError; - m_fError2 = ptryBest2->m_fError; - m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; - m_fError = m_fError1 + m_fError2; - - // sanity check - { - int iRed1 = m_frgbaColor1.IntRed(31.0f); - int iGreen1 = m_frgbaColor1.IntGreen(31.0f); - int iBlue1 = m_frgbaColor1.IntBlue(31.0f); - - int iRed2 = m_frgbaColor2.IntRed(31.0f); - int iGreen2 = m_frgbaColor2.IntGreen(31.0f); - int iBlue2 = m_frgbaColor2.IntBlue(31.0f); - - iDRed = iRed2 - iRed1; - iDGreen = iGreen2 - iGreen1; - iDBlue = iBlue2 - iBlue1; - - assert(iDRed >= -4 && iDRed < 4); - assert(iDGreen >= -4 && iDGreen < 4); - assert(iDBlue >= -4 && iDBlue < 4); - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // mostly copied from ETC1 - // differences: - // uses s_aafCwOpaqueUnsetTable - // color for selector set to 0,0,0,0 - // - void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) - { - - a_phalf->m_ptryBest = nullptr; - float fBestTryError = FLT_MAX; - - a_phalf->m_uiTrys = 0; - for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; - iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius; - iRed++) - { - assert(iRed >= 0 && iRed <= 31); - - for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius; - iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius; - iGreen++) - { - assert(iGreen >= 0 && iGreen <= 31); - - for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius; - iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius; - iBlue++) - { - assert(iBlue >= 0 && iBlue <= 31); - - DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; - assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); - - ptry->m_iRed = iRed; - ptry->m_iGreen = iGreen; - ptry->m_iBlue = iBlue; - ptry->m_fError = FLT_MAX; - ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue); - - // try each CW - for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) - { - unsigned int auiPixelSelectors[PIXELS / 2]; - ColorFloatRGBA afrgbaDecodedColors[PIXELS / 2]; - float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - - // pre-compute decoded pixels for each selector - ColorFloatRGBA afrgbaSelectors[SELECTORS]; - assert(SELECTORS == 4); - afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB(); - afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB(); - afrgbaSelectors[2] = ColorFloatRGBA(); - afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB(); - - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; - const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex]; - ColorFloatRGBA frgbaDecodedPixel; - - for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) - { - if (pfrgbaSourcePixel->fA < 0.5f) - { - uiSelector = TRANSPARENT_SELECTOR; - } - else if (uiSelector == TRANSPARENT_SELECTOR) - { - continue; - } - - frgbaDecodedPixel = afrgbaSelectors[uiSelector]; - - float fPixelError; - - fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); - - if (fPixelError < afPixelErrors[uiPixel]) - { - auiPixelSelectors[uiPixel] = uiSelector; - afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel; - afPixelErrors[uiPixel] = fPixelError; - } - - if (uiSelector == TRANSPARENT_SELECTOR) - { - break; - } - } - } - - // add up all pixel errors - float fCWError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - fCWError += afPixelErrors[uiPixel]; - } - - // if best CW so far - if (fCWError < ptry->m_fError) - { - ptry->m_uiCW = uiCW; - for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) - { - ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; - } - ptry->m_fError = fCWError; - } - - } - - if (ptry->m_fError < fBestTryError) - { - a_phalf->m_ptryBest = ptry; - fBestTryError = ptry->m_fError; - } - - assert(ptry->m_fError < FLT_MAX); - - a_phalf->m_uiTrys++; - } - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in T mode - // save this encoding if it improves the error - // - // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently - // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower - // - void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius) - { - Block4x4Encoding_RGB8A1 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_T; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - encodingTry.m_fError = FLT_MAX; - } - - int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); - int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); - int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); - - int iMinRed1 = iColor1Red - (int)a_uiRadius; - if (iMinRed1 < 0) - { - iMinRed1 = 0; - } - int iMaxRed1 = iColor1Red + (int)a_uiRadius; - if (iMaxRed1 > 15) - { - iMinRed1 = 15; - } - - int iMinGreen1 = iColor1Green - (int)a_uiRadius; - if (iMinGreen1 < 0) - { - iMinGreen1 = 0; - } - int iMaxGreen1 = iColor1Green + (int)a_uiRadius; - if (iMaxGreen1 > 15) - { - iMinGreen1 = 15; - } - - int iMinBlue1 = iColor1Blue - (int)a_uiRadius; - if (iMinBlue1 < 0) - { - iMinBlue1 = 0; - } - int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; - if (iMaxBlue1 > 15) - { - iMinBlue1 = 15; - } - - int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); - int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); - int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); - - int iMinRed2 = iColor2Red - (int)a_uiRadius; - if (iMinRed2 < 0) - { - iMinRed2 = 0; - } - int iMaxRed2 = iColor2Red + (int)a_uiRadius; - if (iMaxRed2 > 15) - { - iMinRed2 = 15; - } - - int iMinGreen2 = iColor2Green - (int)a_uiRadius; - if (iMinGreen2 < 0) - { - iMinGreen2 = 0; - } - int iMaxGreen2 = iColor2Green + (int)a_uiRadius; - if (iMaxGreen2 > 15) - { - iMinGreen2 = 15; - } - - int iMinBlue2 = iColor2Blue - (int)a_uiRadius; - if (iMinBlue2 < 0) - { - iMinBlue2 = 0; - } - int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; - if (iMaxBlue2 > 15) - { - iMinBlue2 = 15; - } - - for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) - { - encodingTry.m_uiCW1 = uiDistance; - - // twiddle m_frgbaOriginalColor2_TAndH - // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector - // - for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) - { - for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) - { - for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) - { - for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) - { - if (uiBaseColorSwaps == 0) - { - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - } - else - { - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; - } - - encodingTry.TryT_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - } - } - } - } - } - - // twiddle m_frgbaOriginalColor1_TAndH - for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) - { - for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) - { - for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) - { - for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) - { - if (uiBaseColorSwaps == 0) - { - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; - } - else - { - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - } - - encodingTry.TryT_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - } - } - } - } - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // find best selector combination for TryT - // called on an encodingTry - // - void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void) - { - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - - unsigned int auiBestPixelSelectors[PIXELS]; - float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; - ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; - - assert(SELECTORS == 4); - afrgbaDecodedPixel[0] = m_frgbaColor1; - afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); - afrgbaDecodedPixel[2] = ColorFloatRGBA(); - afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); - - // try each selector - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - unsigned int uiMinSelector = 0; - unsigned int uiMaxSelector = SELECTORS - 1; - - if (m_pafrgbaSource[uiPixel].fA < 0.5f) - { - uiMinSelector = 2; - uiMaxSelector = 2; - } - - for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) - { - float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel); - - if (fPixelError < afBestPixelErrors[uiPixel]) - { - afBestPixelErrors[uiPixel] = fPixelError; - auiBestPixelSelectors[uiPixel] = uiSelector; - afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; - } - } - } - - - // add up all of the pixel errors - float fBlockError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - fBlockError += afBestPixelErrors[uiPixel]; - } - - if (m_fError > fBlockError) - { - m_fError = fBlockError; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try encoding in H mode - // save this encoding if it improves the error - // - // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently - // TWIDDLE_RADIUS of 2 is WAY too slow - // - void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius) - { - Block4x4Encoding_RGB8A1 encodingTry = *this; - - // init "try" - { - encodingTry.m_mode = MODE_H; - encodingTry.m_boolDiff = true; - encodingTry.m_boolFlip = false; - encodingTry.m_fError = FLT_MAX; - } - - int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); - int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); - int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); - - int iMinRed1 = iColor1Red - (int)a_uiRadius; - if (iMinRed1 < 0) - { - iMinRed1 = 0; - } - int iMaxRed1 = iColor1Red + (int)a_uiRadius; - if (iMaxRed1 > 15) - { - iMinRed1 = 15; - } - - int iMinGreen1 = iColor1Green - (int)a_uiRadius; - if (iMinGreen1 < 0) - { - iMinGreen1 = 0; - } - int iMaxGreen1 = iColor1Green + (int)a_uiRadius; - if (iMaxGreen1 > 15) - { - iMinGreen1 = 15; - } - - int iMinBlue1 = iColor1Blue - (int)a_uiRadius; - if (iMinBlue1 < 0) - { - iMinBlue1 = 0; - } - int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; - if (iMaxBlue1 > 15) - { - iMinBlue1 = 15; - } - - int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); - int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); - int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); - - int iMinRed2 = iColor2Red - (int)a_uiRadius; - if (iMinRed2 < 0) - { - iMinRed2 = 0; - } - int iMaxRed2 = iColor2Red + (int)a_uiRadius; - if (iMaxRed2 > 15) - { - iMinRed2 = 15; - } - - int iMinGreen2 = iColor2Green - (int)a_uiRadius; - if (iMinGreen2 < 0) - { - iMinGreen2 = 0; - } - int iMaxGreen2 = iColor2Green + (int)a_uiRadius; - if (iMaxGreen2 > 15) - { - iMinGreen2 = 15; - } - - int iMinBlue2 = iColor2Blue - (int)a_uiRadius; - if (iMinBlue2 < 0) - { - iMinBlue2 = 0; - } - int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; - if (iMaxBlue2 > 15) - { - iMinBlue2 = 15; - } - - for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) - { - encodingTry.m_uiCW1 = uiDistance; - - // twiddle m_frgbaOriginalColor1_TAndH - for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) - { - for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) - { - for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) - { - encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); - encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; - - // if color1 == color2, H encoding issues can pop up, so abort - if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) - { - continue; - } - - encodingTry.TryH_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - } - } - } - } - - // twiddle m_frgbaOriginalColor2_TAndH - for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) - { - for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) - { - for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) - { - encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; - encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); - - // if color1 == color2, H encoding issues can pop up, so abort - if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) - { - continue; - } - - encodingTry.TryH_BestSelectorCombination(); - - if (encodingTry.m_fError < m_fError) - { - m_mode = encodingTry.m_mode; - m_boolDiff = encodingTry.m_boolDiff; - m_boolFlip = encodingTry.m_boolFlip; - - m_frgbaColor1 = encodingTry.m_frgbaColor1; - m_frgbaColor2 = encodingTry.m_frgbaColor2; - m_uiCW1 = encodingTry.m_uiCW1; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; - } - - m_fError = encodingTry.m_fError; - } - } - } - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // find best selector combination for TryH - // called on an encodingTry - // - void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void) - { - - // abort if colors and CW will pose an encoding problem - { - unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f); - unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f); - unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f); - unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; - - unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f); - unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f); - unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f); - unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; - - unsigned int uiCWLsb = m_uiCW1 & 1; - - if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 || - (uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1) - { - return; - } - } - - float fDistance = s_afTHDistanceTable[m_uiCW1]; - - unsigned int auiBestPixelSelectors[PIXELS]; - float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, - FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; - ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; - ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; - - assert(SELECTORS == 4); - afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); - afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); - afrgbaDecodedPixel[2] = ColorFloatRGBA();; - afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); - - - // try each selector - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - unsigned int uiMinSelector = 0; - unsigned int uiMaxSelector = SELECTORS - 1; - - if (m_pafrgbaSource[uiPixel].fA < 0.5f) - { - uiMinSelector = 2; - uiMaxSelector = 2; - } - - for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) - { - float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel); - - if (fPixelError < afBestPixelErrors[uiPixel]) - { - afBestPixelErrors[uiPixel] = fPixelError; - auiBestPixelSelectors[uiPixel] = uiSelector; - afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; - } - } - } - - - // add up all of the pixel errors - float fBlockError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - fBlockError += afBestPixelErrors[uiPixel]; - } - - if (m_fError > fBlockError) - { - m_fError = fBlockError; - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; - m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; - } - } - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 1 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_RGB8A1::TryDegenerates1(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); - TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 2 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_RGB8A1::TryDegenerates2(void) - { - - TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); - TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); - TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); - TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 3 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_RGB8A1::TryDegenerates3(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); - TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); - TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); - TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); - - } - - // ---------------------------------------------------------------------------------------------------- - // try version 4 of the degenerate search - // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings - // each subsequent version of the degenerate search uses more basecolor movement and is less likely to - // be successfull - // - void Block4x4Encoding_RGB8A1::TryDegenerates4(void) - { - - TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); - TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); - TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_RGB8A1::SetEncodingBits(void) - { - switch (m_mode) - { - case MODE_ETC1: - SetEncodingBits_ETC1(); - break; - - case MODE_T: - SetEncodingBits_T(); - break; - - case MODE_H: - SetEncodingBits_H(); - break; - - case MODE_PLANAR: - Block4x4Encoding_RGB8::SetEncodingBits_Planar(); - break; - - default: - assert(false); - } - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state if ETC1 mode - // - void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void) - { - - // there is no individual mode in RGB8A1 - assert(m_boolDiff); - - int iRed1 = m_frgbaColor1.IntRed(31.0f); - int iGreen1 = m_frgbaColor1.IntGreen(31.0f); - int iBlue1 = m_frgbaColor1.IntBlue(31.0f); - - int iRed2 = m_frgbaColor2.IntRed(31.0f); - int iGreen2 = m_frgbaColor2.IntGreen(31.0f); - int iBlue2 = m_frgbaColor2.IntBlue(31.0f); - - int iDRed2 = iRed2 - iRed1; - int iDGreen2 = iGreen2 - iGreen1; - int iDBlue2 = iBlue2 - iBlue1; - - assert(iDRed2 >= -4 && iDRed2 < 4); - assert(iDGreen2 >= -4 && iDGreen2 < 4); - assert(iDBlue2 >= -4 && iDBlue2 < 4); - - m_pencodingbitsRGB8->differential.red1 = iRed1; - m_pencodingbitsRGB8->differential.green1 = iGreen1; - m_pencodingbitsRGB8->differential.blue1 = iBlue1; - - m_pencodingbitsRGB8->differential.dred2 = iDRed2; - m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; - m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; - - m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; - m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; - - SetEncodingBits_Selectors(); - - // in RGB8A1 encoding bits, opaque replaces differential - m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; - - m_pencodingbitsRGB8->individual.flip = m_boolFlip; - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state if T mode - // - void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void) - { - static const bool SANITY_CHECK = true; - - assert(m_mode == MODE_T); - assert(m_boolDiff == true); - - unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); - unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); - unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); - - unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); - unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); - unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); - - m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; - m_pencodingbitsRGB8->t.red1b = uiRed1; - m_pencodingbitsRGB8->t.green1 = uiGreen1; - m_pencodingbitsRGB8->t.blue1 = uiBlue1; - - m_pencodingbitsRGB8->t.red2 = uiRed2; - m_pencodingbitsRGB8->t.green2 = uiGreen2; - m_pencodingbitsRGB8->t.blue2 = uiBlue2; - - m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; - m_pencodingbitsRGB8->t.db = m_uiCW1; - - // in RGB8A1 encoding bits, opaque replaces differential - m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; - - Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); - - // create an invalid R differential to trigger T mode - m_pencodingbitsRGB8->t.detect1 = 0; - m_pencodingbitsRGB8->t.detect2 = 0; - int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - if (iRed2 >= 4) - { - m_pencodingbitsRGB8->t.detect1 = 7; - m_pencodingbitsRGB8->t.detect2 = 0; - } - else - { - m_pencodingbitsRGB8->t.detect1 = 0; - m_pencodingbitsRGB8->t.detect2 = 1; - } - - if (SANITY_CHECK) - { - iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - - // make sure red overflows - assert(iRed2 < 0 || iRed2 > 31); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state if H mode - // - // colors and selectors may need to swap in order to generate lsb of distance index - // - void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void) - { - static const bool SANITY_CHECK = true; - - assert(m_mode == MODE_H); - assert(m_boolDiff == true); - - unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); - unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); - unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); - - unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); - unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); - unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); - - unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; - unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; - - bool boolOddDistance = m_uiCW1 & 1; - bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; - - if (boolSwapColors) - { - m_pencodingbitsRGB8->h.red1 = uiRed2; - m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; - m_pencodingbitsRGB8->h.green1b = uiGreen2; - m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; - m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; - m_pencodingbitsRGB8->h.blue1c = uiBlue2; - - m_pencodingbitsRGB8->h.red2 = uiRed1; - m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; - m_pencodingbitsRGB8->h.green2b = uiGreen1; - m_pencodingbitsRGB8->h.blue2 = uiBlue1; - - m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; - m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; - } - else - { - m_pencodingbitsRGB8->h.red1 = uiRed1; - m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; - m_pencodingbitsRGB8->h.green1b = uiGreen1; - m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; - m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; - m_pencodingbitsRGB8->h.blue1c = uiBlue1; - - m_pencodingbitsRGB8->h.red2 = uiRed2; - m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; - m_pencodingbitsRGB8->h.green2b = uiGreen2; - m_pencodingbitsRGB8->h.blue2 = uiBlue2; - - m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; - m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; - } - - // in RGB8A1 encoding bits, opaque replaces differential - m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; - - Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); - - if (boolSwapColors) - { - m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; - } - - // create an invalid R differential to trigger T mode - m_pencodingbitsRGB8->h.detect1 = 0; - m_pencodingbitsRGB8->h.detect2 = 0; - m_pencodingbitsRGB8->h.detect3 = 0; - int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - if (iRed2 < 0 || iRed2 > 31) - { - m_pencodingbitsRGB8->h.detect1 = 1; - } - if (iGreen2 >= 4) - { - m_pencodingbitsRGB8->h.detect2 = 7; - m_pencodingbitsRGB8->h.detect3 = 0; - } - else - { - m_pencodingbitsRGB8->h.detect2 = 0; - m_pencodingbitsRGB8->h.detect3 = 1; - } - - if (SANITY_CHECK) - { - iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; - iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; - - // make sure red doesn't overflow and green does - assert(iRed2 >= 0 && iRed2 <= 31); - assert(iGreen2 < 0 || iGreen2 > 31); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - void Block4x4Encoding_RGB8A1::PerformIterationOpaque(float a_fEffort) - { - assert(!m_boolPunchThroughPixels); - assert(!m_boolTransparent); - assert(!m_boolDone); - - switch (m_uiEncodingIterations) - { - case 0: - PerformFirstIterationOpaque(); - break; - - case 1: - Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 2: - Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); - break; - - case 3: - Block4x4Encoding_RGB8::TryPlanar(1); - break; - - case 4: - Block4x4Encoding_RGB8::TryTAndH(1); - if (a_fEffort <= 49.5f) - { - m_boolDone = true; - } - break; - - case 5: - Block4x4Encoding_ETC1::TryDegenerates1(); - if (a_fEffort <= 59.5f) - { - m_boolDone = true; - } - break; - - case 6: - Block4x4Encoding_ETC1::TryDegenerates2(); - if (a_fEffort <= 69.5f) - { - m_boolDone = true; - } - break; - - case 7: - Block4x4Encoding_ETC1::TryDegenerates3(); - if (a_fEffort <= 79.5f) - { - m_boolDone = true; - } - break; - - case 8: - Block4x4Encoding_ETC1::TryDegenerates4(); - m_boolDone = true; - break; - - default: - assert(0); - break; - } - - m_uiEncodingIterations++; - SetDoneIfPerfect(); - } - - // ---------------------------------------------------------------------------------------------------- - // find best initial encoding to ensure block has a valid encoding - // - void Block4x4Encoding_RGB8A1::PerformFirstIterationOpaque(void) - { - - // set decoded alphas - // calculate alpha error - m_fError = 0.0f; - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - // m_afDecodedAlphas[uiPixel] = 1.0f; - - float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA; - m_fError += fDeltaA * fDeltaA; - } - - CalculateMostLikelyFlip(); - - m_fError = FLT_MAX; - - Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - Block4x4Encoding_RGB8::TryPlanar(0); - SetDoneIfPerfect(); - if (m_boolDone) - { - return; - } - Block4x4Encoding_RGB8::TryTAndH(0); - SetDoneIfPerfect(); - } - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - void Block4x4Encoding_RGB8A1::PerformIterationTransparent(float ) - { - assert(!m_boolOpaque); - assert(m_boolTransparent); - assert(!m_boolDone); - assert(m_uiEncodingIterations == 0); - - m_mode = MODE_ETC1; - m_boolDiff = true; - m_boolFlip = false; - - m_uiCW1 = 0; - m_uiCW2 = 0; - - m_frgbaColor1 = ColorFloatRGBA(); - m_frgbaColor2 = ColorFloatRGBA(); - - for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR; - - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); - //m_afDecodedAlphas[uiPixel] = 0.0f; - } - - CalcBlockError(); - - m_boolDone = true; - m_uiEncodingIterations++; - - } - - // ---------------------------------------------------------------------------------------------------- - // -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGB8A1.cpp contains: + Block4x4Encoding_RGB8A1 + Block4x4Encoding_RGB8A1_Opaque + Block4x4Encoding_RGB8A1_Transparent + +These encoders are used when targetting file format RGB8A1. + +Block4x4Encoding_RGB8A1_Opaque is used when all pixels in the 4x4 block are opaque +Block4x4Encoding_RGB8A1_Transparent is used when all pixels in the 4x4 block are transparent +Block4x4Encoding_RGB8A1 is used when there is a mixture of alphas in the 4x4 block + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGB8A1.h" + +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4Encoding_RGB8.h" + +#include +#include +#include + +namespace Etc +{ + + // #################################################################################################### + // Block4x4Encoding_RGB8A1 + // #################################################################################################### + + float Block4x4Encoding_RGB8A1::s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS] = + { + { 0.0f / 255.0f, 8.0f / 255.0f, 0.0f / 255.0f, -8.0f / 255.0f }, + { 0.0f / 255.0f, 17.0f / 255.0f, 0.0f / 255.0f, -17.0f / 255.0f }, + { 0.0f / 255.0f, 29.0f / 255.0f, 0.0f / 255.0f, -29.0f / 255.0f }, + { 0.0f / 255.0f, 42.0f / 255.0f, 0.0f / 255.0f, -42.0f / 255.0f }, + { 0.0f / 255.0f, 60.0f / 255.0f, 0.0f / 255.0f, -60.0f / 255.0f }, + { 0.0f / 255.0f, 80.0f / 255.0f, 0.0f / 255.0f, -80.0f / 255.0f }, + { 0.0f / 255.0f, 106.0f / 255.0f, 0.0f / 255.0f, -106.0f / 255.0f }, + { 0.0f / 255.0f, 183.0f / 255.0f, 0.0f / 255.0f, -183.0f / 255.0f } + }; + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGB8A1::Block4x4Encoding_RGB8A1(void) + { + m_pencodingbitsRGB8 = nullptr; + m_boolOpaque = false; + m_boolTransparent = false; + m_boolPunchThroughPixels = true; + + } + Block4x4Encoding_RGB8A1::~Block4x4Encoding_RGB8A1(void) {} + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_RGB8A1::Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) + { + + Block4x4Encoding_RGB8::Encode(a_pblockParent, + a_pafrgbaSource, + a_paucEncodingBits, + a_errormetric); + + m_boolOpaque = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE; + m_boolTransparent = a_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT; + m_boolPunchThroughPixels = a_pblockParent->HasPunchThroughPixels(); + +// for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) +// { +// if (m_pafrgbaSource[uiPixel].fA >= 0.5f) +// { +// m_afDecodedAlphas[uiPixel] = 1.0f; +// } +// else +// { +// m_afDecodedAlphas[uiPixel] = 0.0f; +// } +// } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGB8A1::Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) + { + + + InitFromEncodingBits_ETC1(a_pblockParent, + a_paucEncodingBits, + a_pafrgbaSource, + a_errormetric, iterationCount); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + // detect if there is a T, H or Planar mode present + int iRed1 = m_pencodingbitsRGB8->differential.red1; + int iDRed2 = m_pencodingbitsRGB8->differential.dred2; + int iRed2 = iRed1 + iDRed2; + + int iGreen1 = m_pencodingbitsRGB8->differential.green1; + int iDGreen2 = m_pencodingbitsRGB8->differential.dgreen2; + int iGreen2 = iGreen1 + iDGreen2; + + int iBlue1 = m_pencodingbitsRGB8->differential.blue1; + int iDBlue2 = m_pencodingbitsRGB8->differential.dblue2; + int iBlue2 = iBlue1 + iDBlue2; + + if (iRed2 < 0 || iRed2 > 31) + { + InitFromEncodingBits_T(); + } + else if (iGreen2 < 0 || iGreen2 > 31) + { + InitFromEncodingBits_H(); + } + else if (iBlue2 < 0 || iBlue2 > 31) + { + Block4x4Encoding_RGB8::InitFromEncodingBits_Planar(); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding assuming the encoding is an ETC1 mode. + // if it isn't an ETC1 mode, this will be overwritten later + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, uint16_t iterationCount) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, + a_errormetric, iterationCount); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)a_paucEncodingBits; + + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = m_pencodingbitsRGB8->differential.flip; + m_boolOpaque = m_pencodingbitsRGB8->differential.diff; + + int iR2 = m_pencodingbitsRGB8->differential.red1 + m_pencodingbitsRGB8->differential.dred2; + int iG2 = m_pencodingbitsRGB8->differential.green1 + m_pencodingbitsRGB8->differential.dgreen2; + int iB2 = m_pencodingbitsRGB8->differential.blue1 + m_pencodingbitsRGB8->differential.dblue2; + + if (iR2 < 0) + { + iR2 = 0; + } + else if (iR2 > 31) + { + iR2 = 31; + } + + if (iG2 < 0) + { + iG2 = 0; + } + else if (iG2 > 31) + { + iG2 = 31; + } + + if (iB2 < 0) + { + iB2 = 0; + } + else if (iB2 > 31) + { + iB2 = 31; + } + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5(m_pencodingbitsRGB8->differential.red1, m_pencodingbitsRGB8->differential.green1, m_pencodingbitsRGB8->differential.blue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iR2, (unsigned char)iG2, (unsigned char)iB2); + + m_uiCW1 = m_pencodingbitsRGB8->differential.cw1; + m_uiCW2 = m_pencodingbitsRGB8->differential.cw2; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + Decode_ETC1(); + + CalcBlockError(); + + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if T mode is detected + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_T(void) + { + m_mode = MODE_T; + + unsigned char ucRed1 = (unsigned char)((m_pencodingbitsRGB8->t.red1a << 2) + + m_pencodingbitsRGB8->t.red1b); + unsigned char ucGreen1 = m_pencodingbitsRGB8->t.green1; + unsigned char ucBlue1 = m_pencodingbitsRGB8->t.blue1; + + unsigned char ucRed2 = m_pencodingbitsRGB8->t.red2; + unsigned char ucGreen2 = m_pencodingbitsRGB8->t.green2; + unsigned char ucBlue2 = m_pencodingbitsRGB8->t.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->t.da << 1) + m_pencodingbitsRGB8->t.db; + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_T(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding if H mode is detected + // + void Block4x4Encoding_RGB8A1::InitFromEncodingBits_H(void) + { + m_mode = MODE_H; + + unsigned char ucRed1 = m_pencodingbitsRGB8->h.red1; + unsigned char ucGreen1 = (unsigned char)((m_pencodingbitsRGB8->h.green1a << 1) + + m_pencodingbitsRGB8->h.green1b); + unsigned char ucBlue1 = (unsigned char)((m_pencodingbitsRGB8->h.blue1a << 3) + + (m_pencodingbitsRGB8->h.blue1b << 1) + + m_pencodingbitsRGB8->h.blue1c); + + unsigned char ucRed2 = m_pencodingbitsRGB8->h.red2; + unsigned char ucGreen2 = (unsigned char)((m_pencodingbitsRGB8->h.green2a << 1) + + m_pencodingbitsRGB8->h.green2b); + unsigned char ucBlue2 = m_pencodingbitsRGB8->h.blue2; + + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4(ucRed1, ucGreen1, ucBlue1); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4(ucRed2, ucGreen2, ucBlue2); + + // used to determine the LSB of the CW + unsigned int uiRGB1 = (unsigned int)(((int)ucRed1 << 16) + ((int)ucGreen1 << 8) + (int)ucBlue1); + unsigned int uiRGB2 = (unsigned int)(((int)ucRed2 << 16) + ((int)ucGreen2 << 8) + (int)ucBlue2); + + m_uiCW1 = (m_pencodingbitsRGB8->h.da << 2) + (m_pencodingbitsRGB8->h.db << 1); + if (uiRGB1 >= uiRGB2) + { + m_uiCW1++; + } + + Block4x4Encoding_ETC1::InitFromEncodingBits_Selectors(); + + DecodePixels_H(); + + CalcBlockError(); + } + + // ---------------------------------------------------------------------------------------------------- + // for ETC1 modes, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::Decode_ETC1(void) + { + + const unsigned int *pauiPixelOrder = m_boolFlip ? s_auiPixelOrderFlip1 : s_auiPixelOrderFlip0; + + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS; uiPixelOrder++) + { + ColorFloatRGBA *pfrgbaCenter = uiPixelOrder < 8 ? &m_frgbaColor1 : &m_frgbaColor2; + unsigned int uiCW = uiPixelOrder < 8 ? m_uiCW1 : m_uiCW2; + + unsigned int uiPixel = pauiPixelOrder[uiPixelOrder]; + + float fDelta; + if (m_boolOpaque) + fDelta = Block4x4Encoding_ETC1::s_aafCwTable[uiCW][m_auiSelectors[uiPixel]]; + else + fDelta = s_aafCwOpaqueUnsetTable[uiCW][m_auiSelectors[uiPixel]]; + + if (m_boolOpaque == false && m_auiSelectors[uiPixel] == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = (*pfrgbaCenter + fDelta).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + } + + // TODO: this isn't setting alpha + } + + } + + // ---------------------------------------------------------------------------------------------------- + // for T mode, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::DecodePixels_T(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor1; + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 2: + if (m_boolOpaque == false) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = m_frgbaColor2; + //m_afDecodedAlphas[uiPixel] = 1.0f; + } + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // for H mode, set the decoded colors and decoded alpha based on the encoding state + // + void Block4x4Encoding_RGB8A1::DecodePixels_H(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + ColorFloatRGBA frgbaDistance(fDistance, fDistance, fDistance, 0.0f); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + switch (m_auiSelectors[uiPixel]) + { + case 0: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 + frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 1: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor1 - frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + + case 2: + if (m_boolOpaque == false) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel] = 0.0f; + } + else + { + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 + frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + } + break; + + case 3: + m_afrgbaDecodedColors[uiPixel] = (m_frgbaColor2 - frgbaDistance).ClampRGB(); + //m_afDecodedAlphas[uiPixel] = 1.0f; + break; + } + + } + + } + + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + // RGB8A1 can't use individual mode + // RGB8A1 with transparent pixels can't use planar mode + // + void Block4x4Encoding_RGB8A1::PerformIteration(float a_fEffort) + { + if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::OPAQUE) + { + PerformIterationOpaque(a_fEffort); + return; + } + else if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT) + { + PerformIterationTransparent(a_fEffort); + return; + } + + assert(!m_boolOpaque); + assert(!m_boolTransparent); + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIteration(); + break; + + case 1: + TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + if (a_fEffort <= 39.5f) + { + m_boolDone = true; + } + break; + + case 3: + Block4x4Encoding_RGB8::CalculateBaseColorsForTAndH(); + TryT(1); + TryH(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 4: + TryDegenerates1(); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 5: + TryDegenerates2(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 6: + TryDegenerates3(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 7: + TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + + SetDoneIfPerfect(); + + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_RGB8A1::PerformFirstIteration(void) + { + Block4x4Encoding_ETC1::CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + + } + + // ---------------------------------------------------------------------------------------------------- + // mostly copied from ETC1 + // differences: + // Block4x4Encoding_RGB8A1 encodingTry = *this; + // + void Block4x4Encoding_RGB8A1::TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + + ColorFloatRGBA frgbaColor1; + ColorFloatRGBA frgbaColor2; + + const unsigned int *pauiPixelMapping1; + const unsigned int *pauiPixelMapping2; + + if (a_boolFlip) + { + frgbaColor1 = m_frgbaSourceAverageTop; + frgbaColor2 = m_frgbaSourceAverageBottom; + + pauiPixelMapping1 = s_auiTopPixelMapping; + pauiPixelMapping2 = s_auiBottomPixelMapping; + } + else + { + frgbaColor1 = m_frgbaSourceAverageLeft; + frgbaColor2 = m_frgbaSourceAverageRight; + + pauiPixelMapping1 = s_auiLeftPixelMapping; + pauiPixelMapping2 = s_auiRightPixelMapping; + } + + DifferentialTrys trys(frgbaColor1, frgbaColor2, pauiPixelMapping1, pauiPixelMapping2, + a_uiRadius, a_iGrayOffset1, a_iGrayOffset2); + + Block4x4Encoding_RGB8A1 encodingTry = *this; + encodingTry.m_boolFlip = a_boolFlip; + + encodingTry.TryDifferentialHalf(&trys.m_half1); + encodingTry.TryDifferentialHalf(&trys.m_half2); + + // find best halves that are within differential range + DifferentialTrys::Try *ptryBest1 = nullptr; + DifferentialTrys::Try *ptryBest2 = nullptr; + encodingTry.m_fError = FLT_MAX; + + // see if the best of each half are in differential range + int iDRed = trys.m_half2.m_ptryBest->m_iRed - trys.m_half1.m_ptryBest->m_iRed; + int iDGreen = trys.m_half2.m_ptryBest->m_iGreen - trys.m_half1.m_ptryBest->m_iGreen; + int iDBlue = trys.m_half2.m_ptryBest->m_iBlue - trys.m_half1.m_ptryBest->m_iBlue; + if (iDRed >= -4 && iDRed <= 3 && iDGreen >= -4 && iDGreen <= 3 && iDBlue >= -4 && iDBlue <= 3) + { + ptryBest1 = trys.m_half1.m_ptryBest; + ptryBest2 = trys.m_half2.m_ptryBest; + encodingTry.m_fError = trys.m_half1.m_ptryBest->m_fError + trys.m_half2.m_ptryBest->m_fError; + } + else + { + // else, find the next best halves that are in differential range + for (DifferentialTrys::Try *ptry1 = &trys.m_half1.m_atry[0]; + ptry1 < &trys.m_half1.m_atry[trys.m_half1.m_uiTrys]; + ptry1++) + { + for (DifferentialTrys::Try *ptry2 = &trys.m_half2.m_atry[0]; + ptry2 < &trys.m_half2.m_atry[trys.m_half2.m_uiTrys]; + ptry2++) + { + iDRed = ptry2->m_iRed - ptry1->m_iRed; + bool boolValidRedDelta = iDRed <= 3 && iDRed >= -4; + iDGreen = ptry2->m_iGreen - ptry1->m_iGreen; + bool boolValidGreenDelta = iDGreen <= 3 && iDGreen >= -4; + iDBlue = ptry2->m_iBlue - ptry1->m_iBlue; + bool boolValidBlueDelta = iDBlue <= 3 && iDBlue >= -4; + + if (boolValidRedDelta && boolValidGreenDelta && boolValidBlueDelta) + { + float fError = ptry1->m_fError + ptry2->m_fError; + + if (fError < encodingTry.m_fError) + { + encodingTry.m_fError = fError; + + ptryBest1 = ptry1; + ptryBest2 = ptry2; + } + } + + } + } + assert(encodingTry.m_fError < FLT_MAX); + assert(ptryBest1 != nullptr); + assert(ptryBest2 != nullptr); + } + + if (encodingTry.m_fError < m_fError) + { + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = encodingTry.m_boolFlip; + m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest1->m_iRed, (unsigned char)ptryBest1->m_iGreen, (unsigned char)ptryBest1->m_iBlue); + m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB5((unsigned char)ptryBest2->m_iRed, (unsigned char)ptryBest2->m_iGreen, (unsigned char)ptryBest2->m_iBlue); + m_uiCW1 = ptryBest1->m_uiCW; + m_uiCW2 = ptryBest2->m_uiCW; + + m_fError = 0.0f; + for (unsigned int uiPixelOrder = 0; uiPixelOrder < PIXELS / 2; uiPixelOrder++) + { + unsigned int uiPixel1 = pauiPixelMapping1[uiPixelOrder]; + unsigned int uiPixel2 = pauiPixelMapping2[uiPixelOrder]; + + unsigned int uiSelector1 = ptryBest1->m_auiSelectors[uiPixelOrder]; + unsigned int uiSelector2 = ptryBest2->m_auiSelectors[uiPixelOrder]; + + m_auiSelectors[uiPixel1] = uiSelector1; + m_auiSelectors[uiPixel2] = ptryBest2->m_auiSelectors[uiPixelOrder]; + + float alpha1 = 1.0; + float alpha2 = 1.0; + + if (uiSelector1 == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel1] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel1] = 0.0f; + alpha1 = 0.0; + } + else + { + float fDeltaRGB1 = s_aafCwOpaqueUnsetTable[m_uiCW1][uiSelector1]; + m_afrgbaDecodedColors[uiPixel1] = (m_frgbaColor1 + fDeltaRGB1).ClampRGB(); + //m_afDecodedAlphas[uiPixel1] = 1.0f; + } + + if (uiSelector2 == TRANSPARENT_SELECTOR) + { + m_afrgbaDecodedColors[uiPixel2] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel2] = 0.0f; + alpha2 = 0.0; + } + else + { + float fDeltaRGB2 = s_aafCwOpaqueUnsetTable[m_uiCW2][uiSelector2]; + m_afrgbaDecodedColors[uiPixel2] = (m_frgbaColor2 + fDeltaRGB2).ClampRGB(); + //m_afDecodedAlphas[uiPixel2] = 1.0f; + } + + float fDeltaA1 =alpha1 - m_pafrgbaSource[uiPixel1].fA; + m_fError += fDeltaA1 * fDeltaA1; + float fDeltaA2 = alpha2 - m_pafrgbaSource[uiPixel2].fA; + m_fError += fDeltaA2 * fDeltaA2; + } + + m_fError1 = ptryBest1->m_fError; + m_fError2 = ptryBest2->m_fError; + m_boolSeverelyBentDifferentialColors = trys.m_boolSeverelyBentColors; + m_fError = m_fError1 + m_fError2; + + // sanity check + { + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + iDRed = iRed2 - iRed1; + iDGreen = iGreen2 - iGreen1; + iDBlue = iBlue2 - iBlue1; + + assert(iDRed >= -4 && iDRed < 4); + assert(iDGreen >= -4 && iDGreen < 4); + assert(iDBlue >= -4 && iDBlue < 4); + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // mostly copied from ETC1 + // differences: + // uses s_aafCwOpaqueUnsetTable + // color for selector set to 0,0,0,0 + // + void Block4x4Encoding_RGB8A1::TryDifferentialHalf(DifferentialTrys::Half *a_phalf) + { + + a_phalf->m_ptryBest = nullptr; + float fBestTryError = FLT_MAX; + + a_phalf->m_uiTrys = 0; + for (int iRed = a_phalf->m_iRed - (int)a_phalf->m_uiRadius; + iRed <= a_phalf->m_iRed + (int)a_phalf->m_uiRadius; + iRed++) + { + assert(iRed >= 0 && iRed <= 31); + + for (int iGreen = a_phalf->m_iGreen - (int)a_phalf->m_uiRadius; + iGreen <= a_phalf->m_iGreen + (int)a_phalf->m_uiRadius; + iGreen++) + { + assert(iGreen >= 0 && iGreen <= 31); + + for (int iBlue = a_phalf->m_iBlue - (int)a_phalf->m_uiRadius; + iBlue <= a_phalf->m_iBlue + (int)a_phalf->m_uiRadius; + iBlue++) + { + assert(iBlue >= 0 && iBlue <= 31); + + DifferentialTrys::Try *ptry = &a_phalf->m_atry[a_phalf->m_uiTrys]; + assert(ptry < &a_phalf->m_atry[DifferentialTrys::Half::MAX_TRYS]); + + ptry->m_iRed = iRed; + ptry->m_iGreen = iGreen; + ptry->m_iBlue = iBlue; + ptry->m_fError = FLT_MAX; + ColorFloatRGBA frgbaColor = ColorFloatRGBA::ConvertFromRGB5((unsigned char)iRed, (unsigned char)iGreen, (unsigned char)iBlue); + + // try each CW + for (unsigned int uiCW = 0; uiCW < CW_RANGES; uiCW++) + { + unsigned int auiPixelSelectors[PIXELS / 2]; + ColorFloatRGBA afrgbaDecodedColors[PIXELS / 2]; + float afPixelErrors[PIXELS / 2] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + + // pre-compute decoded pixels for each selector + ColorFloatRGBA afrgbaSelectors[SELECTORS]; + assert(SELECTORS == 4); + afrgbaSelectors[0] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][0]).ClampRGB(); + afrgbaSelectors[1] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][1]).ClampRGB(); + afrgbaSelectors[2] = ColorFloatRGBA(); + afrgbaSelectors[3] = (frgbaColor + s_aafCwOpaqueUnsetTable[uiCW][3]).ClampRGB(); + + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + int srcPixelIndex = a_phalf->m_pauiPixelMapping[uiPixel]; + const ColorFloatRGBA *pfrgbaSourcePixel = &m_pafrgbaSource[srcPixelIndex]; + ColorFloatRGBA frgbaDecodedPixel; + + for (unsigned int uiSelector = 0; uiSelector < SELECTORS; uiSelector++) + { + if (pfrgbaSourcePixel->fA < 0.5f) + { + uiSelector = TRANSPARENT_SELECTOR; + } + else if (uiSelector == TRANSPARENT_SELECTOR) + { + continue; + } + + frgbaDecodedPixel = afrgbaSelectors[uiSelector]; + + float fPixelError; + + fPixelError = CalcPixelError(frgbaDecodedPixel, srcPixelIndex); + + if (fPixelError < afPixelErrors[uiPixel]) + { + auiPixelSelectors[uiPixel] = uiSelector; + afrgbaDecodedColors[uiPixel] = frgbaDecodedPixel; + afPixelErrors[uiPixel] = fPixelError; + } + + if (uiSelector == TRANSPARENT_SELECTOR) + { + break; + } + } + } + + // add up all pixel errors + float fCWError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + fCWError += afPixelErrors[uiPixel]; + } + + // if best CW so far + if (fCWError < ptry->m_fError) + { + ptry->m_uiCW = uiCW; + for (unsigned int uiPixel = 0; uiPixel < 8; uiPixel++) + { + ptry->m_auiSelectors[uiPixel] = auiPixelSelectors[uiPixel]; + } + ptry->m_fError = fCWError; + } + + } + + if (ptry->m_fError < fBestTryError) + { + a_phalf->m_ptryBest = ptry; + fBestTryError = ptry->m_fError; + } + + assert(ptry->m_fError < FLT_MAX); + + a_phalf->m_uiTrys++; + } + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in T mode + // save this encoding if it improves the error + // + // since pixels that use base color1 don't use the distance table, color1 and color2 can be twiddled independently + // better encoding can be found if TWIDDLE_RADIUS is set to 2, but it will be much slower + // + void Block4x4Encoding_RGB8A1::TryT(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8A1 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_T; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor2_TAndH + // twiddle color2 first, since it affects 3 selectors, while color1 only affects one selector + // + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + } + else + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor1_TAndH; + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + for (unsigned int uiBaseColorSwaps = 0; uiBaseColorSwaps < 2; uiBaseColorSwaps++) + { + if (uiBaseColorSwaps == 0) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + } + else + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor2_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + } + + encodingTry.TryT_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryT + // called on an encodingTry + // + void Block4x4Encoding_RGB8A1::TryT_BestSelectorCombination(void) + { + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = m_frgbaColor1; + afrgbaDecodedPixel[1] = (m_frgbaColor2 + fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = ColorFloatRGBA(); + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + // try each selector + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiMinSelector = 0; + unsigned int uiMaxSelector = SELECTORS - 1; + + if (m_pafrgbaSource[uiPixel].fA < 0.5f) + { + uiMinSelector = 2; + uiMaxSelector = 2; + } + + for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) + { + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (m_fError > fBlockError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try encoding in H mode + // save this encoding if it improves the error + // + // since all pixels use the distance table, color1 and color2 can NOT be twiddled independently + // TWIDDLE_RADIUS of 2 is WAY too slow + // + void Block4x4Encoding_RGB8A1::TryH(unsigned int a_uiRadius) + { + Block4x4Encoding_RGB8A1 encodingTry = *this; + + // init "try" + { + encodingTry.m_mode = MODE_H; + encodingTry.m_boolDiff = true; + encodingTry.m_boolFlip = false; + encodingTry.m_fError = FLT_MAX; + } + + int iColor1Red = m_frgbaOriginalColor1_TAndH.IntRed(15.0f); + int iColor1Green = m_frgbaOriginalColor1_TAndH.IntGreen(15.0f); + int iColor1Blue = m_frgbaOriginalColor1_TAndH.IntBlue(15.0f); + + int iMinRed1 = iColor1Red - (int)a_uiRadius; + if (iMinRed1 < 0) + { + iMinRed1 = 0; + } + int iMaxRed1 = iColor1Red + (int)a_uiRadius; + if (iMaxRed1 > 15) + { + iMinRed1 = 15; + } + + int iMinGreen1 = iColor1Green - (int)a_uiRadius; + if (iMinGreen1 < 0) + { + iMinGreen1 = 0; + } + int iMaxGreen1 = iColor1Green + (int)a_uiRadius; + if (iMaxGreen1 > 15) + { + iMinGreen1 = 15; + } + + int iMinBlue1 = iColor1Blue - (int)a_uiRadius; + if (iMinBlue1 < 0) + { + iMinBlue1 = 0; + } + int iMaxBlue1 = iColor1Blue + (int)a_uiRadius; + if (iMaxBlue1 > 15) + { + iMinBlue1 = 15; + } + + int iColor2Red = m_frgbaOriginalColor2_TAndH.IntRed(15.0f); + int iColor2Green = m_frgbaOriginalColor2_TAndH.IntGreen(15.0f); + int iColor2Blue = m_frgbaOriginalColor2_TAndH.IntBlue(15.0f); + + int iMinRed2 = iColor2Red - (int)a_uiRadius; + if (iMinRed2 < 0) + { + iMinRed2 = 0; + } + int iMaxRed2 = iColor2Red + (int)a_uiRadius; + if (iMaxRed2 > 15) + { + iMinRed2 = 15; + } + + int iMinGreen2 = iColor2Green - (int)a_uiRadius; + if (iMinGreen2 < 0) + { + iMinGreen2 = 0; + } + int iMaxGreen2 = iColor2Green + (int)a_uiRadius; + if (iMaxGreen2 > 15) + { + iMinGreen2 = 15; + } + + int iMinBlue2 = iColor2Blue - (int)a_uiRadius; + if (iMinBlue2 < 0) + { + iMinBlue2 = 0; + } + int iMaxBlue2 = iColor2Blue + (int)a_uiRadius; + if (iMaxBlue2 > 15) + { + iMinBlue2 = 15; + } + + for (unsigned int uiDistance = 0; uiDistance < TH_DISTANCES; uiDistance++) + { + encodingTry.m_uiCW1 = uiDistance; + + // twiddle m_frgbaOriginalColor1_TAndH + for (int iRed1 = iMinRed1; iRed1 <= iMaxRed1; iRed1++) + { + for (int iGreen1 = iMinGreen1; iGreen1 <= iMaxGreen1; iGreen1++) + { + for (int iBlue1 = iMinBlue1; iBlue1 <= iMaxBlue1; iBlue1++) + { + encodingTry.m_frgbaColor1 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed1, (unsigned char)iGreen1, (unsigned char)iBlue1); + encodingTry.m_frgbaColor2 = m_frgbaOriginalColor2_TAndH; + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed1 == iColor2Red && iGreen1 == iColor2Green && iBlue1 == iColor2Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + // twiddle m_frgbaOriginalColor2_TAndH + for (int iRed2 = iMinRed2; iRed2 <= iMaxRed2; iRed2++) + { + for (int iGreen2 = iMinGreen2; iGreen2 <= iMaxGreen2; iGreen2++) + { + for (int iBlue2 = iMinBlue2; iBlue2 <= iMaxBlue2; iBlue2++) + { + encodingTry.m_frgbaColor1 = m_frgbaOriginalColor1_TAndH; + encodingTry.m_frgbaColor2 = ColorFloatRGBA::ConvertFromRGB4((unsigned char)iRed2, (unsigned char)iGreen2, (unsigned char)iBlue2); + + // if color1 == color2, H encoding issues can pop up, so abort + if (iRed2 == iColor1Red && iGreen2 == iColor1Green && iBlue2 == iColor1Blue) + { + continue; + } + + encodingTry.TryH_BestSelectorCombination(); + + if (encodingTry.m_fError < m_fError) + { + m_mode = encodingTry.m_mode; + m_boolDiff = encodingTry.m_boolDiff; + m_boolFlip = encodingTry.m_boolFlip; + + m_frgbaColor1 = encodingTry.m_frgbaColor1; + m_frgbaColor2 = encodingTry.m_frgbaColor2; + m_uiCW1 = encodingTry.m_uiCW1; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = encodingTry.m_auiSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = encodingTry.m_afrgbaDecodedColors[uiPixel]; + } + + m_fError = encodingTry.m_fError; + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // find best selector combination for TryH + // called on an encodingTry + // + void Block4x4Encoding_RGB8A1::TryH_BestSelectorCombination(void) + { + + // abort if colors and CW will pose an encoding problem + { + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(255.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(255.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(255.0f); + unsigned int uiColorValue1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(255.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(255.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(255.0f); + unsigned int uiColorValue2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + unsigned int uiCWLsb = m_uiCW1 & 1; + + if ((uiColorValue1 >= (uiColorValue2 & uiCWLsb)) == 0 || + (uiColorValue1 < (uiColorValue2 & uiCWLsb)) == 1) + { + return; + } + } + + float fDistance = s_afTHDistanceTable[m_uiCW1]; + + unsigned int auiBestPixelSelectors[PIXELS]; + float afBestPixelErrors[PIXELS] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, + FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }; + ColorFloatRGBA afrgbaBestDecodedPixels[PIXELS]; + ColorFloatRGBA afrgbaDecodedPixel[SELECTORS]; + + assert(SELECTORS == 4); + afrgbaDecodedPixel[0] = (m_frgbaColor1 + fDistance).ClampRGB(); + afrgbaDecodedPixel[1] = (m_frgbaColor1 - fDistance).ClampRGB(); + afrgbaDecodedPixel[2] = ColorFloatRGBA();; + afrgbaDecodedPixel[3] = (m_frgbaColor2 - fDistance).ClampRGB(); + + + // try each selector + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + unsigned int uiMinSelector = 0; + unsigned int uiMaxSelector = SELECTORS - 1; + + if (m_pafrgbaSource[uiPixel].fA < 0.5f) + { + uiMinSelector = 2; + uiMaxSelector = 2; + } + + for (unsigned int uiSelector = uiMinSelector; uiSelector <= uiMaxSelector; uiSelector++) + { + float fPixelError = CalcPixelError(afrgbaDecodedPixel[uiSelector], uiPixel); + + if (fPixelError < afBestPixelErrors[uiPixel]) + { + afBestPixelErrors[uiPixel] = fPixelError; + auiBestPixelSelectors[uiPixel] = uiSelector; + afrgbaBestDecodedPixels[uiPixel] = afrgbaDecodedPixel[uiSelector]; + } + } + } + + + // add up all of the pixel errors + float fBlockError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestPixelErrors[uiPixel]; + } + + if (m_fError > fBlockError) + { + m_fError = fBlockError; + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = auiBestPixelSelectors[uiPixel]; + m_afrgbaDecodedColors[uiPixel] = afrgbaBestDecodedPixels[uiPixel]; + } + } + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 1 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates1(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 2 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates2(void) + { + + TryDifferential(!m_boolMostLikelyFlip, 1, -2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 2, 0); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, 2); + TryDifferential(!m_boolMostLikelyFlip, 1, 0, -2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 3 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates3(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, -2, 2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, -2); + TryDifferential(m_boolMostLikelyFlip, 1, 2, 2); + + } + + // ---------------------------------------------------------------------------------------------------- + // try version 4 of the degenerate search + // degenerate encodings use basecolor movement and a subset of the selectors to find useful encodings + // each subsequent version of the degenerate search uses more basecolor movement and is less likely to + // be successfull + // + void Block4x4Encoding_RGB8A1::TryDegenerates4(void) + { + + TryDifferential(m_boolMostLikelyFlip, 1, -4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 4, 0); + TryDifferential(m_boolMostLikelyFlip, 1, 0, 4); + TryDifferential(m_boolMostLikelyFlip, 1, 0, -4); + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGB8A1::SetEncodingBits(void) + { + switch (m_mode) + { + case MODE_ETC1: + SetEncodingBits_ETC1(); + break; + + case MODE_T: + SetEncodingBits_T(); + break; + + case MODE_H: + SetEncodingBits_H(); + break; + + case MODE_PLANAR: + Block4x4Encoding_RGB8::SetEncodingBits_Planar(); + break; + + default: + assert(false); + } + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if ETC1 mode + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_ETC1(void) + { + + // there is no individual mode in RGB8A1 + assert(m_boolDiff); + + int iRed1 = m_frgbaColor1.IntRed(31.0f); + int iGreen1 = m_frgbaColor1.IntGreen(31.0f); + int iBlue1 = m_frgbaColor1.IntBlue(31.0f); + + int iRed2 = m_frgbaColor2.IntRed(31.0f); + int iGreen2 = m_frgbaColor2.IntGreen(31.0f); + int iBlue2 = m_frgbaColor2.IntBlue(31.0f); + + int iDRed2 = iRed2 - iRed1; + int iDGreen2 = iGreen2 - iGreen1; + int iDBlue2 = iBlue2 - iBlue1; + + assert(iDRed2 >= -4 && iDRed2 < 4); + assert(iDGreen2 >= -4 && iDGreen2 < 4); + assert(iDBlue2 >= -4 && iDBlue2 < 4); + + m_pencodingbitsRGB8->differential.red1 = iRed1; + m_pencodingbitsRGB8->differential.green1 = iGreen1; + m_pencodingbitsRGB8->differential.blue1 = iBlue1; + + m_pencodingbitsRGB8->differential.dred2 = iDRed2; + m_pencodingbitsRGB8->differential.dgreen2 = iDGreen2; + m_pencodingbitsRGB8->differential.dblue2 = iDBlue2; + + m_pencodingbitsRGB8->individual.cw1 = m_uiCW1; + m_pencodingbitsRGB8->individual.cw2 = m_uiCW2; + + SetEncodingBits_Selectors(); + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + m_pencodingbitsRGB8->individual.flip = m_boolFlip; + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if T mode + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_T(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_T); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + m_pencodingbitsRGB8->t.red1a = uiRed1 >> 2; + m_pencodingbitsRGB8->t.red1b = uiRed1; + m_pencodingbitsRGB8->t.green1 = uiGreen1; + m_pencodingbitsRGB8->t.blue1 = uiBlue1; + + m_pencodingbitsRGB8->t.red2 = uiRed2; + m_pencodingbitsRGB8->t.green2 = uiGreen2; + m_pencodingbitsRGB8->t.blue2 = uiBlue2; + + m_pencodingbitsRGB8->t.da = m_uiCW1 >> 1; + m_pencodingbitsRGB8->t.db = m_uiCW1; + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + if (iRed2 >= 4) + { + m_pencodingbitsRGB8->t.detect1 = 7; + m_pencodingbitsRGB8->t.detect2 = 0; + } + else + { + m_pencodingbitsRGB8->t.detect1 = 0; + m_pencodingbitsRGB8->t.detect2 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + + // make sure red overflows + assert(iRed2 < 0 || iRed2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state if H mode + // + // colors and selectors may need to swap in order to generate lsb of distance index + // + void Block4x4Encoding_RGB8A1::SetEncodingBits_H(void) + { + static const bool SANITY_CHECK = true; + + assert(m_mode == MODE_H); + assert(m_boolDiff == true); + + unsigned int uiRed1 = (unsigned int)m_frgbaColor1.IntRed(15.0f); + unsigned int uiGreen1 = (unsigned int)m_frgbaColor1.IntGreen(15.0f); + unsigned int uiBlue1 = (unsigned int)m_frgbaColor1.IntBlue(15.0f); + + unsigned int uiRed2 = (unsigned int)m_frgbaColor2.IntRed(15.0f); + unsigned int uiGreen2 = (unsigned int)m_frgbaColor2.IntGreen(15.0f); + unsigned int uiBlue2 = (unsigned int)m_frgbaColor2.IntBlue(15.0f); + + unsigned int uiColor1 = (uiRed1 << 16) + (uiGreen1 << 8) + uiBlue1; + unsigned int uiColor2 = (uiRed2 << 16) + (uiGreen2 << 8) + uiBlue2; + + bool boolOddDistance = m_uiCW1 & 1; + bool boolSwapColors = (uiColor1 < uiColor2) ^ !boolOddDistance; + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.red1 = uiRed2; + m_pencodingbitsRGB8->h.green1a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen2; + m_pencodingbitsRGB8->h.blue1a = uiBlue2 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue2 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue2; + + m_pencodingbitsRGB8->h.red2 = uiRed1; + m_pencodingbitsRGB8->h.green2a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen1; + m_pencodingbitsRGB8->h.blue2 = uiBlue1; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + else + { + m_pencodingbitsRGB8->h.red1 = uiRed1; + m_pencodingbitsRGB8->h.green1a = uiGreen1 >> 1; + m_pencodingbitsRGB8->h.green1b = uiGreen1; + m_pencodingbitsRGB8->h.blue1a = uiBlue1 >> 3; + m_pencodingbitsRGB8->h.blue1b = uiBlue1 >> 1; + m_pencodingbitsRGB8->h.blue1c = uiBlue1; + + m_pencodingbitsRGB8->h.red2 = uiRed2; + m_pencodingbitsRGB8->h.green2a = uiGreen2 >> 1; + m_pencodingbitsRGB8->h.green2b = uiGreen2; + m_pencodingbitsRGB8->h.blue2 = uiBlue2; + + m_pencodingbitsRGB8->h.da = m_uiCW1 >> 2; + m_pencodingbitsRGB8->h.db = m_uiCW1 >> 1; + } + + // in RGB8A1 encoding bits, opaque replaces differential + m_pencodingbitsRGB8->differential.diff = !m_boolPunchThroughPixels; + + Block4x4Encoding_ETC1::SetEncodingBits_Selectors(); + + if (boolSwapColors) + { + m_pencodingbitsRGB8->h.selectors ^= 0x0000FFFF; + } + + // create an invalid R differential to trigger T mode + m_pencodingbitsRGB8->h.detect1 = 0; + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 0; + int iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + int iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + if (iRed2 < 0 || iRed2 > 31) + { + m_pencodingbitsRGB8->h.detect1 = 1; + } + if (iGreen2 >= 4) + { + m_pencodingbitsRGB8->h.detect2 = 7; + m_pencodingbitsRGB8->h.detect3 = 0; + } + else + { + m_pencodingbitsRGB8->h.detect2 = 0; + m_pencodingbitsRGB8->h.detect3 = 1; + } + + if (SANITY_CHECK) + { + iRed2 = (int)m_pencodingbitsRGB8->differential.red1 + (int)m_pencodingbitsRGB8->differential.dred2; + iGreen2 = (int)m_pencodingbitsRGB8->differential.green1 + (int)m_pencodingbitsRGB8->differential.dgreen2; + + // make sure red doesn't overflow and green does + assert(iRed2 >= 0 && iRed2 <= 31); + assert(iGreen2 < 0 || iGreen2 > 31); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8A1::PerformIterationOpaque(float a_fEffort) + { + assert(!m_boolPunchThroughPixels); + assert(!m_boolTransparent); + assert(!m_boolDone); + + switch (m_uiEncodingIterations) + { + case 0: + PerformFirstIterationOpaque(); + break; + + case 1: + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 2: + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 1, 0, 0); + break; + + case 3: + Block4x4Encoding_RGB8::TryPlanar(1); + break; + + case 4: + Block4x4Encoding_RGB8::TryTAndH(1); + if (a_fEffort <= 49.5f) + { + m_boolDone = true; + } + break; + + case 5: + Block4x4Encoding_ETC1::TryDegenerates1(); + if (a_fEffort <= 59.5f) + { + m_boolDone = true; + } + break; + + case 6: + Block4x4Encoding_ETC1::TryDegenerates2(); + if (a_fEffort <= 69.5f) + { + m_boolDone = true; + } + break; + + case 7: + Block4x4Encoding_ETC1::TryDegenerates3(); + if (a_fEffort <= 79.5f) + { + m_boolDone = true; + } + break; + + case 8: + Block4x4Encoding_ETC1::TryDegenerates4(); + m_boolDone = true; + break; + + default: + assert(0); + break; + } + + m_uiEncodingIterations++; + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // find best initial encoding to ensure block has a valid encoding + // + void Block4x4Encoding_RGB8A1::PerformFirstIterationOpaque(void) + { + + // set decoded alphas + // calculate alpha error + m_fError = 0.0f; + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // m_afDecodedAlphas[uiPixel] = 1.0f; + + float fDeltaA = 1.0f - m_pafrgbaSource[uiPixel].fA; + m_fError += fDeltaA * fDeltaA; + } + + CalculateMostLikelyFlip(); + + m_fError = FLT_MAX; + + Block4x4Encoding_ETC1::TryDifferential(m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_ETC1::TryDifferential(!m_boolMostLikelyFlip, 0, 0, 0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_RGB8::TryPlanar(0); + SetDoneIfPerfect(); + if (m_boolDone) + { + return; + } + Block4x4Encoding_RGB8::TryTAndH(0); + SetDoneIfPerfect(); + } + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + void Block4x4Encoding_RGB8A1::PerformIterationTransparent(float ) + { + assert(!m_boolOpaque); + assert(m_boolTransparent); + assert(!m_boolDone); + assert(m_uiEncodingIterations == 0); + + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = false; + + m_uiCW1 = 0; + m_uiCW2 = 0; + + m_frgbaColor1 = ColorFloatRGBA(); + m_frgbaColor2 = ColorFloatRGBA(); + + for (unsigned int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiSelectors[uiPixel] = TRANSPARENT_SELECTOR; + + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); + //m_afDecodedAlphas[uiPixel] = 0.0f; + } + + CalcBlockError(); + + m_boolDone = true; + m_uiEncodingIterations++; + + } + + // ---------------------------------------------------------------------------------------------------- + // +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h index f6b31bad..05e57417 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGB8A1.h @@ -1,140 +1,140 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcBlock4x4Encoding_RGB8.h" -#include "EtcErrorMetric.h" -#include "EtcBlock4x4EncodingBits.h" - -namespace Etc -{ - - // ################################################################################ - // Block4x4Encoding_RGB8A1 - // RGB8A1 if not completely opaque or transparent - // ################################################################################ - - class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8 - { - public: - - static const unsigned int TRANSPARENT_SELECTOR = 2; - - Block4x4Encoding_RGB8A1(void); - virtual ~Block4x4Encoding_RGB8A1(void); - - virtual void Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, - ErrorMetric a_errormetric) override; - - virtual void Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) override; - - virtual void PerformIteration(float a_fEffort) override; - - virtual void SetEncodingBits(void) override; - - void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, uint16_t iterationCount); - - void InitFromEncodingBits_T(void); - void InitFromEncodingBits_H(void); - - void PerformFirstIteration(void); - - void Decode_ETC1(void); - void DecodePixels_T(void); - void DecodePixels_H(void); - void SetEncodingBits_ETC1(void); - void SetEncodingBits_T(void); - void SetEncodingBits_H(void); - - private: - - bool m_boolOpaque; // all source pixels have alpha >= 0.5 - bool m_boolTransparent; // all source pixels have alpha < 0.5 - bool m_boolPunchThroughPixels; // some source pixels have alpha < 0.5 - - // pulled from ETC1 - static const unsigned int CW_BITS = 3; - static const unsigned int CW_RANGES = 1 << CW_BITS; - - static const unsigned int SELECTOR_BITS = 2; - static const unsigned int SELECTORS = 1 << SELECTOR_BITS; - - static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS]; - - private: - - void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2); - void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); - - void TryT(unsigned int a_uiRadius); - void TryT_BestSelectorCombination(void); - void TryH(unsigned int a_uiRadius); - void TryH_BestSelectorCombination(void); - - void TryDegenerates1(void); - void TryDegenerates2(void); - void TryDegenerates3(void); - void TryDegenerates4(void); - - void PerformIterationOpaque(float a_fEffort); - void PerformFirstIterationOpaque(void); - void PerformIterationTransparent(float); - }; - -// // ################################################################################ -// // Block4x4Encoding_RGB8A1_Opaque -// // RGB8A1 if all pixels have alpha==1 -// // ################################################################################ -// -// class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1 -// { -// public: -// -// virtual void PerformIteration(float a_fEffort); -// -// void PerformFirstIteration(void); -// -// private: -// -// }; -// -// // ################################################################################ -// // Block4x4Encoding_RGB8A1_Transparent -// // RGB8A1 if all pixels have alpha==0 -// // ################################################################################ -// -// class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1 -// { -// public: -// -// virtual void PerformIteration(float a_fEffort); -// -// private: -// -// }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" +#include "EtcErrorMetric.h" +#include "EtcBlock4x4EncodingBits.h" + +namespace Etc +{ + + // ################################################################################ + // Block4x4Encoding_RGB8A1 + // RGB8A1 if not completely opaque or transparent + // ################################################################################ + + class Block4x4Encoding_RGB8A1 : public Block4x4Encoding_RGB8 + { + public: + + static const unsigned int TRANSPARENT_SELECTOR = 2; + + Block4x4Encoding_RGB8A1(void); + virtual ~Block4x4Encoding_RGB8A1(void); + + virtual void Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + ErrorMetric a_errormetric) override; + + virtual void Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) override; + + virtual void PerformIteration(float a_fEffort) override; + + virtual void SetEncodingBits(void) override; + + void InitFromEncodingBits_ETC1(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, uint16_t iterationCount); + + void InitFromEncodingBits_T(void); + void InitFromEncodingBits_H(void); + + void PerformFirstIteration(void); + + void Decode_ETC1(void); + void DecodePixels_T(void); + void DecodePixels_H(void); + void SetEncodingBits_ETC1(void); + void SetEncodingBits_T(void); + void SetEncodingBits_H(void); + + private: + + bool m_boolOpaque; // all source pixels have alpha >= 0.5 + bool m_boolTransparent; // all source pixels have alpha < 0.5 + bool m_boolPunchThroughPixels; // some source pixels have alpha < 0.5 + + // pulled from ETC1 + static const unsigned int CW_BITS = 3; + static const unsigned int CW_RANGES = 1 << CW_BITS; + + static const unsigned int SELECTOR_BITS = 2; + static const unsigned int SELECTORS = 1 << SELECTOR_BITS; + + static float s_aafCwOpaqueUnsetTable[CW_RANGES][SELECTORS]; + + private: + + void TryDifferential(bool a_boolFlip, unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + void TryDifferentialHalf(DifferentialTrys::Half *a_phalf); + + void TryT(unsigned int a_uiRadius); + void TryT_BestSelectorCombination(void); + void TryH(unsigned int a_uiRadius); + void TryH_BestSelectorCombination(void); + + void TryDegenerates1(void); + void TryDegenerates2(void); + void TryDegenerates3(void); + void TryDegenerates4(void); + + void PerformIterationOpaque(float a_fEffort); + void PerformFirstIterationOpaque(void); + void PerformIterationTransparent(float); + }; + +// // ################################################################################ +// // Block4x4Encoding_RGB8A1_Opaque +// // RGB8A1 if all pixels have alpha==1 +// // ################################################################################ +// +// class Block4x4Encoding_RGB8A1_Opaque : public Block4x4Encoding_RGB8A1 +// { +// public: +// +// virtual void PerformIteration(float a_fEffort); +// +// void PerformFirstIteration(void); +// +// private: +// +// }; +// +// // ################################################################################ +// // Block4x4Encoding_RGB8A1_Transparent +// // RGB8A1 if all pixels have alpha==0 +// // ################################################################################ +// +// class Block4x4Encoding_RGB8A1_Transparent : public Block4x4Encoding_RGB8A1 +// { +// public: +// +// virtual void PerformIteration(float a_fEffort); +// +// private: +// +// }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp index 2c9fcdaa..33b08271 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.cpp @@ -1,552 +1,556 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcBlock4x4Encoding_RGBA8.cpp contains: - Block4x4Encoding_RGBA8 - Block4x4Encoding_RGBA8_Opaque - Block4x4Encoding_RGBA8_Transparent - -These encoders are used when targetting file format RGBA8. - -Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque -Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent -Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block - -*/ - -#include "EtcConfig.h" -#include "EtcBlock4x4Encoding_RGBA8.h" - -#include "EtcBlock4x4EncodingBits.h" -#include "EtcBlock4x4.h" - -#include -#include -#include -#include -#include -//#include - -namespace Etc -{ - template - T clamp(T value, T mn, T mx) { - return (value <= mn) ? mn : ((value >= mx) ? mx : value); - } - - // #################################################################################################### - // Block4x4Encoding_RGBA8 - // #################################################################################################### - - static const unsigned int MODIFIER_TABLE_ENTRYS = 16; - static const unsigned int ALPHA_SELECTOR_BITS = 3; - static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS; - - // same selector table used for R11/G11/A8 - static const int8_t s_aafModifierTable8[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS] - { - { -3, -6, -9, -15, 2, 5, 8, 14 }, - { -3, -7, -10, -13, 2, 6, 9, 12 }, - { -2, -5, -8, -13, 1, 4, 7, 12 }, - { -2, -4, -6, -13, 1, 3, 5, 12 }, - - { -3, -6, -8, -12, 2, 5, 7, 11 }, - { -3, -7, -9, -11, 2, 6, 8, 10 }, - { -4, -7, -8, -11, 3, 6, 7, 10 }, - { -3, -5, -8, -11, 2, 4, 7, 10 }, - - { -2, -6, -8, -10, 1, 5, 7, 9 }, - { -2, -5, -8, -10, 1, 4, 7, 9 }, - { -2, -4, -8, -10, 1, 3, 7, 9 }, - { -2, -5, -7, -10, 1, 4, 6, 9 }, - - { -3, -4, -7, -10, 2, 3, 6, 9 }, - { -1, -2, -3, -10, 0, 1, 2, 9 }, - { -4, -6, -8, -9, 3, 5, 7, 8 }, - { -3, -5, -7, -9, 2, 4, 6, 8 } - }; - - inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier, - unsigned int a_uiTableIndex, unsigned int a_uiSelector) - { - float fPixelAlpha = (a_fBase + - a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]) / 255.0f; - if (fPixelAlpha < 0.0f) - { - fPixelAlpha = 0.0f; - } - else if (fPixelAlpha > 1.0f) - { - fPixelAlpha = 1.0f; - } - - return fPixelAlpha; - } - - inline int DecodePixelAlphaInt(int a_fBase, int a_fMultiplier, - unsigned int a_uiTableIndex, unsigned int a_uiSelector) - { - int fPixelAlpha = a_fBase + - a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]; - - return clamp(fPixelAlpha, 0, 255); - } - - - - Block4x4Encoding_A8::Block4x4Encoding_A8(void) - { - m_pencodingbitsA8 = nullptr; - m_pafrgbaSource = nullptr; - } - - Block4x4Encoding_A8::~Block4x4Encoding_A8(void) {} - - void Block4x4Encoding_A8::Encode(const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, - Block4x4::SourceAlphaMix sourceAlphaMix) - { - m_pafrgbaSource = a_pafrgbaSource; - - m_boolDone = false; - - // really only care about error for one iteration - //m_fError = FLT_MAX; - - m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; - - if (sourceAlphaMix == Block4x4::SourceAlphaMix::OPAQUE) - { - // set the A8 portion - m_fBase = 255; - m_uiModifierTableIndex = 15; - m_fMultiplier = 15; - - // set all selectors to 7 (all bits set) - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiAlphaSelectors[uiPixel] = 7; - } - - m_boolDone = true; - } - else if ((sourceAlphaMix == Block4x4::SourceAlphaMix::ALL_ZERO_ALPHA) || - (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT)) - { - // set the A8 portion - m_fBase = 0; - m_uiModifierTableIndex = 0; - m_fMultiplier = 1; - - // set all selectors to 0 - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiAlphaSelectors[uiPixel] = 0; - } - - m_boolDone = true; - } - } - - // A8 always finished in one iterations, but error metrics on rgb iteration may need the alpha values - // in an error metric. Skip this if alpha not part of the metric. - void Block4x4Encoding_A8::Decode(unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource) - { - // Note: this is really just decoding to write this exact same data out - - m_pafrgbaSource = a_pafrgbaSource; // don't really need to hold this - m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; - - m_fBase = m_pencodingbitsA8->data.base; - m_fMultiplier = m_pencodingbitsA8->data.multiplier; - m_uiModifierTableIndex = m_pencodingbitsA8->data.table; - - uint64_t ulliSelectorBits = 0; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8; - ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - uint64_t uiShift = 45 - (3 * uiPixel); - m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1); - } - - //Encode(a_pafrgbaSource, a_paucEncodingBits, sourceAlphaMix); - - // no iteration on A8, it's all done in after first PerformIteration - m_boolDone = true; - - // no error calc since this doesn't iterate, it's already resolved alpha - } - - void Block4x4Encoding_A8::DecodeAlpha(float* decodedPixels) - { -// m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; -// -// m_fBase = m_pencodingbitsA8->data.base; -// m_fMultiplier = m_pencodingbitsA8->data.multiplier; -// m_uiModifierTableIndex = m_pencodingbitsA8->data.table; -// -// uint64_t ulliSelectorBits = 0; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8; -// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5; -// -// for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) -// { -// uint64_t uiShift = 45 - (3 * uiPixel); -// m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1); -// } - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - // this is float version of decode - float pixel = DecodePixelAlpha(m_fBase, m_fMultiplier, - m_uiModifierTableIndex, - m_auiAlphaSelectors[uiPixel]); - - decodedPixels[4 * uiPixel] = pixel; - } - } - - void Block4x4Encoding_A8::PerformIteration(float a_fEffort) - { - if (m_boolDone) - { - return; - } - - - // 0, 1, 2 pixel radius all done in iteration 0, only - // rgb is iterated on over multiple passes. - if (a_fEffort < 24.9f) - { - CalculateA8(0); - } - else if (a_fEffort < 49.9f) - { - CalculateA8(1); - } - else - { - CalculateA8(2); - } - - m_boolDone = true; - } - - void Block4x4Encoding_A8::CalculateA8(int a_fRadius) - { - float m_fError = FLT_MAX; - - // This code is similiar to CalculateR11. And it's all very slow doing brute force - // searches over a large nested for loop space. - uint8_t srcAlpha[PIXELS]; - - // find min/max alpha - int fMinAlpha = 255; - int fMaxAlpha = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int fAlpha = (int)roundf(255.0f * m_pafrgbaSource[uiPixel].fA); - if (fAlpha < fMinAlpha) - { - fMinAlpha = fAlpha; - } - if (fAlpha > fMaxAlpha) - { - fMaxAlpha = fAlpha; - } - - srcAlpha[uiPixel] = fAlpha; - } - - assert(fMinAlpha >= 0); - assert(fMaxAlpha <= 255); - assert(fMinAlpha <= fMaxAlpha); - - int fAlphaRange = fMaxAlpha - fMinAlpha; - - // fast path if range 0 (constant alpha), no point in all this iteration - if (fAlphaRange == 0) - { - a_fRadius = 0; - } - - // try each modifier table entry - //m_fError = FLT_MAX; // artificially high value - for (int uiTableEntry = 0; uiTableEntry < (int)MODIFIER_TABLE_ENTRYS; uiTableEntry++) - { - static const unsigned int MIN_VALUE_SELECTOR = 3; - static const unsigned int MAX_VALUE_SELECTOR = 7; - - int fTableEntryCenter = -s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR]; - - int fTableEntryRange = s_aafModifierTable8[uiTableEntry][MAX_VALUE_SELECTOR] - - s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR]; - - float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange; - - int fCenterInt = (int)roundf(fMinAlpha + fCenterRatio * fAlphaRange); - //int fCenterInt = roundf(fCenter); - - int fMinBase = fCenterInt - a_fRadius; - int fMaxBase = fCenterInt + a_fRadius; - - if (fMinBase < 0) - { - fMinBase = 0; - } - if (fMaxBase > 255) - { - fMaxBase = 255; - } - - // 255 range / usp to 29 - int fRangeMultiplier = (int)roundf(fAlphaRange / (float)fTableEntryRange); - - int fMinMultiplier = clamp(fRangeMultiplier - a_fRadius, 1, 15); // no 0 case like on R11 - int fMaxMultiplier = clamp(fRangeMultiplier + a_fRadius, 1, 15); - - int auiBestSelectors[PIXELS]; - int afBestAlphaError[PIXELS]; - int afBestDecodedAlphas[PIXELS]; - - for (int fBase = fMinBase; fBase <= fMaxBase; fBase++) - { - for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++) - { - // find best selector for each pixel - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - int fBestPixelAlphaError = 255 * 255; - for (int uiSelector = 0; uiSelector < (int)ALPHA_SELECTORS; uiSelector++) - { - int fDecodedAlpha = DecodePixelAlphaInt(fBase, fMultiplier, uiTableEntry, uiSelector); - - // pixelError = dA ^ 2 - int fPixelDeltaAlpha = fDecodedAlpha - (int)srcAlpha[uiPixel]; - int fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha; - - if (fPixelAlphaError < fBestPixelAlphaError) - { - fBestPixelAlphaError = fPixelAlphaError; - auiBestSelectors[uiPixel] = uiSelector; - afBestAlphaError[uiPixel] = fBestPixelAlphaError; - afBestDecodedAlphas[uiPixel] = fDecodedAlpha; - } - } - } - - // accumlate pixel error into block error, sum(da^2) - int fBlockError = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - fBlockError += afBestAlphaError[uiPixel]; - } - - if (m_fError > (float)fBlockError) - { - m_fError = (float)fBlockError; - - m_fBase = fBase; - m_fMultiplier = fMultiplier; - m_uiModifierTableIndex = uiTableEntry; - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel]; - - //m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel] / 255.0f; - } - - // stop the iteration if tolerance is low enough - const int kErrorTolerance = 16 * 1 * 1; - if (fBlockError <= kErrorTolerance) { - return; - } - } - } - } - - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_A8::SetEncodingBits(void) - { - // set the A8 portion - m_pencodingbitsA8->data.base = (uint8_t)roundf(/*255.0f * */ m_fBase); - m_pencodingbitsA8->data.table = m_uiModifierTableIndex; - m_pencodingbitsA8->data.multiplier = (uint8_t)roundf(m_fMultiplier); - - uint64_t ulliSelectorBits = 0; - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - uint64_t uiShift = 45 - (3 * uiPixel); - ulliSelectorBits |= ((uint64_t)m_auiAlphaSelectors[uiPixel]) << uiShift; - } - - m_pencodingbitsA8->data.selectors0 = uint32_t(ulliSelectorBits >> (uint64_t)40); - m_pencodingbitsA8->data.selectors1 = uint32_t(ulliSelectorBits >> (uint64_t)32); - m_pencodingbitsA8->data.selectors2 = uint32_t(ulliSelectorBits >> (uint64_t)24); - m_pencodingbitsA8->data.selectors3 = uint32_t(ulliSelectorBits >> (uint64_t)16); - m_pencodingbitsA8->data.selectors4 = uint32_t(ulliSelectorBits >> (uint64_t)8); - m_pencodingbitsA8->data.selectors5 = uint32_t(ulliSelectorBits); - } - - - // ---------------------------------------------------------------------------------------------------- - // - Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void) - { - } - Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {} - - // ---------------------------------------------------------------------------------------------------- - // initialization prior to encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits - // - void Block4x4Encoding_RGBA8::Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) - { - Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0); - - // RGB stored after A8 block - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8); - - // Only need alpha channel passed down - m_alpha.Encode(a_pafrgbaSource, a_paucEncodingBits, a_pblockParent->GetSourceAlphaMix()); - } - - // ---------------------------------------------------------------------------------------------------- - // initialization from the encoding bits of a previous encoding - // a_pblockParent points to the block associated with this encoding - // a_errormetric is used to choose the best encoding - // a_pafrgbaSource points to a 4x4 block subset of the source image - // a_paucEncodingBits points to the final encoding bits of a previous encoding - // - void Block4x4Encoding_RGBA8::Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) - { - // this won't iterate, but alpha values available for error calc - // but not using alpha in error calc anymore, so doing after RGB8 decode - m_alpha.Decode(a_paucEncodingBits, a_pafrgbaSource); - - m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8); - - // init RGB portion - Block4x4Encoding_RGB8::Decode(a_pblockParent, - (unsigned char *) m_pencodingbitsRGB8, - a_pafrgbaSource, - a_errormetric, - iterationCount); - } - - void Block4x4Encoding_RGBA8::DecodeAlpha() - { - // API hack toe be able to fill in the decodedPixels from the already Decode called alpha - // this is so regular Decode path doesn't do this decode and slow down multipass - m_alpha.DecodeAlpha(&m_afrgbaDecodedColors[0].fA); - } - - - // ---------------------------------------------------------------------------------------------------- - // perform a single encoding iteration - // replace the encoding if a better encoding was found - // subsequent iterations generally take longer for each iteration - // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort - // - // similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added - // - void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort) - { - // return if color and alpha done, note alpha only iterates on 0 - if (m_boolDone && m_alpha.IsDone() ) - { - return; - } - - if (m_uiEncodingIterations == 0) - { - m_alpha.PerformIteration(a_fEffort); - - // this skips writing out color too - if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT) - { - m_mode = MODE_ETC1; - m_boolDiff = true; - m_boolFlip = false; - - // none of these were cleared, like RGBA1 case - m_uiCW1 = 0; - m_uiCW2 = 0; - - m_frgbaColor1 = ColorFloatRGBA(); - m_frgbaColor2 = ColorFloatRGBA(); - - for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) - { - m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); // assumes rgb also 0 - //m_afDecodedAlphas[uiPixel] = 0.0f; - } - - m_fError = 0.0f; - - // skip processing rgb - m_boolDone = true; - //m_uiEncodingIterations++; - } - } - - if (!m_boolDone) - { - Block4x4Encoding_RGB8::PerformIteration(a_fEffort); - } - - } - - // ---------------------------------------------------------------------------------------------------- - // set the encoding bits based on encoding state - // - void Block4x4Encoding_RGBA8::SetEncodingBits(void) - { - // set the RGB8 portion - Block4x4Encoding_RGB8::SetEncodingBits(); - - m_alpha.SetEncodingBits(); - } -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcBlock4x4Encoding_RGBA8.cpp contains: + Block4x4Encoding_RGBA8 + Block4x4Encoding_RGBA8_Opaque + Block4x4Encoding_RGBA8_Transparent + +These encoders are used when targetting file format RGBA8. + +Block4x4Encoding_RGBA8_Opaque is used when all pixels in the 4x4 block are opaque +Block4x4Encoding_RGBA8_Transparent is used when all pixels in the 4x4 block are transparent +Block4x4Encoding_RGBA8 is used when there is a mixture of alphas in the 4x4 block + +*/ + +#include "EtcConfig.h" +#include "EtcBlock4x4Encoding_RGBA8.h" + +#include "EtcBlock4x4EncodingBits.h" +#include "EtcBlock4x4.h" + +#include +#include +#include +#include +#include +//#include + +namespace Etc +{ + template + T clamp(T value, T mn, T mx) { + return (value <= mn) ? mn : ((value >= mx) ? mx : value); + } + + // #################################################################################################### + // Block4x4Encoding_RGBA8 + // #################################################################################################### + + static const unsigned int MODIFIER_TABLE_ENTRYS = 16; + static const unsigned int ALPHA_SELECTOR_BITS = 3; + static const unsigned int ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS; + + // same selector table used for R11/G11/A8 + static const int8_t s_aafModifierTable8[MODIFIER_TABLE_ENTRYS][ALPHA_SELECTORS] + { + { -3, -6, -9, -15, 2, 5, 8, 14 }, + { -3, -7, -10, -13, 2, 6, 9, 12 }, + { -2, -5, -8, -13, 1, 4, 7, 12 }, + { -2, -4, -6, -13, 1, 3, 5, 12 }, + + { -3, -6, -8, -12, 2, 5, 7, 11 }, + { -3, -7, -9, -11, 2, 6, 8, 10 }, + { -4, -7, -8, -11, 3, 6, 7, 10 }, + { -3, -5, -8, -11, 2, 4, 7, 10 }, + + { -2, -6, -8, -10, 1, 5, 7, 9 }, + { -2, -5, -8, -10, 1, 4, 7, 9 }, + { -2, -4, -8, -10, 1, 3, 7, 9 }, + { -2, -5, -7, -10, 1, 4, 6, 9 }, + + { -3, -4, -7, -10, 2, 3, 6, 9 }, + { -1, -2, -3, -10, 0, 1, 2, 9 }, + { -4, -6, -8, -9, 3, 5, 7, 8 }, + { -3, -5, -7, -9, 2, 4, 6, 8 } + }; + + inline float DecodePixelAlpha(float a_fBase, float a_fMultiplier, + unsigned int a_uiTableIndex, unsigned int a_uiSelector) + { + float fPixelAlpha = (a_fBase + + a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]) / 255.0f; + if (fPixelAlpha < 0.0f) + { + fPixelAlpha = 0.0f; + } + else if (fPixelAlpha > 1.0f) + { + fPixelAlpha = 1.0f; + } + + return fPixelAlpha; + } + + inline int DecodePixelAlphaInt(int a_fBase, int a_fMultiplier, + unsigned int a_uiTableIndex, unsigned int a_uiSelector) + { + int fPixelAlpha = a_fBase + + a_fMultiplier * s_aafModifierTable8[a_uiTableIndex][a_uiSelector]; + + return clamp(fPixelAlpha, 0, 255); + } + + + + Block4x4Encoding_A8::Block4x4Encoding_A8(void) + { + m_pencodingbitsA8 = nullptr; + m_pafrgbaSource = nullptr; + } + + Block4x4Encoding_A8::~Block4x4Encoding_A8(void) {} + + void Block4x4Encoding_A8::Encode(const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + Block4x4::SourceAlphaMix sourceAlphaMix) + { + m_pafrgbaSource = a_pafrgbaSource; + + m_boolDone = false; + + // really only care about error for one iteration + //m_fError = FLT_MAX; + + m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; + + if (sourceAlphaMix == Block4x4::SourceAlphaMix::OPAQUE) + { + // set the A8 portion + m_fBase = 255; + m_uiModifierTableIndex = 15; + m_fMultiplier = 15; + + // set all selectors to 7 (all bits set) + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiAlphaSelectors[uiPixel] = 7; + } + + m_boolDone = true; + } + else if ((sourceAlphaMix == Block4x4::SourceAlphaMix::ALL_ZERO_ALPHA) || + (sourceAlphaMix == Block4x4::SourceAlphaMix::TRANSPARENT)) + { + // set the A8 portion + m_fBase = 0; + m_uiModifierTableIndex = 0; + m_fMultiplier = 1; + + // set all selectors to 0 + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiAlphaSelectors[uiPixel] = 0; + } + + m_boolDone = true; + } + } + + // A8 always finished in one iterations, but error metrics on rgb iteration may need the alpha values + // in an error metric. Skip this if alpha not part of the metric. + void Block4x4Encoding_A8::Decode(unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource) + { + // Note: this is really just decoding to write this exact same data out + + m_pafrgbaSource = a_pafrgbaSource; // don't really need to hold this + m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; + + m_fBase = m_pencodingbitsA8->data.base; + m_fMultiplier = m_pencodingbitsA8->data.multiplier; + m_uiModifierTableIndex = m_pencodingbitsA8->data.table; + + uint64_t ulliSelectorBits = 0; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8; + ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + uint64_t uiShift = 45 - (3 * uiPixel); + m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1); + } + + //Encode(a_pafrgbaSource, a_paucEncodingBits, sourceAlphaMix); + + // no iteration on A8, it's all done in after first PerformIteration + m_boolDone = true; + + // no error calc since this doesn't iterate, it's already resolved alpha + } + + void Block4x4Encoding_A8::DecodeAlpha(float* decodedPixels) + { +// m_pencodingbitsA8 = (Block4x4EncodingBits_A8 *)a_paucEncodingBits; +// +// m_fBase = m_pencodingbitsA8->data.base; +// m_fMultiplier = m_pencodingbitsA8->data.multiplier; +// m_uiModifierTableIndex = m_pencodingbitsA8->data.table; +// +// uint64_t ulliSelectorBits = 0; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors0 << (uint64_t)40; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors1 << (uint64_t)32; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors2 << (uint64_t)24; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors3 << (uint64_t)16; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors4 << (uint64_t)8; +// ulliSelectorBits |= (uint64_t)m_pencodingbitsA8->data.selectors5; +// +// for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) +// { +// uint64_t uiShift = 45 - (3 * uiPixel); +// m_auiAlphaSelectors[uiPixel] = (ulliSelectorBits >> uiShift) & (uint64_t)(ALPHA_SELECTORS - 1); +// } + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + // this is float version of decode + float pixel = DecodePixelAlpha(m_fBase, m_fMultiplier, + m_uiModifierTableIndex, + m_auiAlphaSelectors[uiPixel]); + + decodedPixels[4 * uiPixel] = pixel; + } + } + + void Block4x4Encoding_A8::PerformIteration(float a_fEffort) + { + if (m_boolDone) + { + return; + } + + + // 0, 1, 2 pixel radius all done in iteration 0, only + // rgb is iterated on over multiple passes. + if (a_fEffort < 24.9f) + { + CalculateA8(0); + } + else if (a_fEffort < 49.9f) + { + CalculateA8(1); + } + else + { + CalculateA8(2); + } + + m_boolDone = true; + } + + void Block4x4Encoding_A8::CalculateA8(int a_fRadius) + { + float m_fError = FLT_MAX; + + // This code is similiar to CalculateR11. And it's all very slow doing brute force + // searches over a large nested for loop space. + uint8_t srcAlpha[PIXELS]; + + // find min/max alpha + int fMinAlpha = 255; + int fMaxAlpha = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int fAlpha = (int)roundf(255.0f * m_pafrgbaSource[uiPixel].fA); + if (fAlpha < fMinAlpha) + { + fMinAlpha = fAlpha; + } + if (fAlpha > fMaxAlpha) + { + fMaxAlpha = fAlpha; + } + + srcAlpha[uiPixel] = fAlpha; + } + + assert(fMinAlpha >= 0); + assert(fMaxAlpha <= 255); + assert(fMinAlpha <= fMaxAlpha); + + int fAlphaRange = fMaxAlpha - fMinAlpha; + + // fast path if range 0 (constant alpha), no point in all this iteration + if (fAlphaRange == 0) + { + a_fRadius = 0; + } + + // try each modifier table entry + //m_fError = FLT_MAX; // artificially high value + for (int uiTableEntry = 0; uiTableEntry < (int)MODIFIER_TABLE_ENTRYS; uiTableEntry++) + { + static const unsigned int MIN_VALUE_SELECTOR = 3; + static const unsigned int MAX_VALUE_SELECTOR = 7; + + int fTableEntryCenter = -s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR]; + + int fTableEntryRange = s_aafModifierTable8[uiTableEntry][MAX_VALUE_SELECTOR] - + s_aafModifierTable8[uiTableEntry][MIN_VALUE_SELECTOR]; + + float fCenterRatio = fTableEntryCenter / (float)fTableEntryRange; + + int fCenterInt = (int)roundf(fMinAlpha + fCenterRatio * fAlphaRange); + //int fCenterInt = roundf(fCenter); + + int fMinBase = fCenterInt - a_fRadius; + int fMaxBase = fCenterInt + a_fRadius; + + if (fMinBase < 0) + { + fMinBase = 0; + } + if (fMaxBase > 255) + { + fMaxBase = 255; + } + + // 255 range / usp to 29 + int fRangeMultiplier = (int)roundf(fAlphaRange / (float)fTableEntryRange); + + int fMinMultiplier = clamp(fRangeMultiplier - a_fRadius, 1, 15); // no 0 case like on R11 + int fMaxMultiplier = clamp(fRangeMultiplier + a_fRadius, 1, 15); + + int auiBestSelectors[PIXELS]; + int afBestAlphaError[PIXELS]; + int afBestDecodedAlphas[PIXELS]; + + for (int fBase = fMinBase; fBase <= fMaxBase; fBase++) + { + for (int fMultiplier = fMinMultiplier; fMultiplier <= fMaxMultiplier; fMultiplier++) + { + // find best selector for each pixel + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + int fBestPixelAlphaError = 255 * 255; + for (int uiSelector = 0; uiSelector < (int)ALPHA_SELECTORS; uiSelector++) + { + int fDecodedAlpha = DecodePixelAlphaInt(fBase, fMultiplier, uiTableEntry, uiSelector); + + // pixelError = dA ^ 2 + int fPixelDeltaAlpha = fDecodedAlpha - (int)srcAlpha[uiPixel]; + int fPixelAlphaError = fPixelDeltaAlpha * fPixelDeltaAlpha; + + if (fPixelAlphaError < fBestPixelAlphaError) + { + fBestPixelAlphaError = fPixelAlphaError; + auiBestSelectors[uiPixel] = uiSelector; + afBestAlphaError[uiPixel] = fBestPixelAlphaError; + afBestDecodedAlphas[uiPixel] = fDecodedAlpha; + } + } + } + + // accumlate pixel error into block error, sum(da^2) + int fBlockError = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + fBlockError += afBestAlphaError[uiPixel]; + } + + if (m_fError > (float)fBlockError) + { + m_fError = (float)fBlockError; + + m_fBase = fBase; + m_fMultiplier = fMultiplier; + m_uiModifierTableIndex = uiTableEntry; + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_auiAlphaSelectors[uiPixel] = auiBestSelectors[uiPixel]; + + //m_afDecodedAlphas[uiPixel] = afBestDecodedAlphas[uiPixel] / 255.0f; + } + + // stop the iteration if tolerance is low enough + const int kErrorTolerance = 16 * 1 * 1; + if (fBlockError <= kErrorTolerance) { + return; + } + } + } + } + + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_A8::SetEncodingBits(void) + { + // set the A8 portion + m_pencodingbitsA8->data.base = (uint8_t)roundf(/*255.0f * */ m_fBase); + m_pencodingbitsA8->data.table = m_uiModifierTableIndex; + m_pencodingbitsA8->data.multiplier = (uint8_t)roundf(m_fMultiplier); + + uint64_t ulliSelectorBits = 0; + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + uint64_t uiShift = 45 - (3 * uiPixel); + ulliSelectorBits |= ((uint64_t)m_auiAlphaSelectors[uiPixel]) << uiShift; + } + + m_pencodingbitsA8->data.selectors0 = uint32_t(ulliSelectorBits >> (uint64_t)40); + m_pencodingbitsA8->data.selectors1 = uint32_t(ulliSelectorBits >> (uint64_t)32); + m_pencodingbitsA8->data.selectors2 = uint32_t(ulliSelectorBits >> (uint64_t)24); + m_pencodingbitsA8->data.selectors3 = uint32_t(ulliSelectorBits >> (uint64_t)16); + m_pencodingbitsA8->data.selectors4 = uint32_t(ulliSelectorBits >> (uint64_t)8); + m_pencodingbitsA8->data.selectors5 = uint32_t(ulliSelectorBits); + } + + + // ---------------------------------------------------------------------------------------------------- + // + Block4x4Encoding_RGBA8::Block4x4Encoding_RGBA8(void) + { + } + Block4x4Encoding_RGBA8::~Block4x4Encoding_RGBA8(void) {} + + // ---------------------------------------------------------------------------------------------------- + // initialization prior to encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits + // + void Block4x4Encoding_RGBA8::Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) + { + Block4x4Encoding::Init(a_pblockParent, a_pafrgbaSource, a_errormetric, 0); + + // RGB stored after A8 block + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8); + + // Only need alpha channel passed down + m_alpha.Encode(a_pafrgbaSource, a_paucEncodingBits, a_pblockParent->GetSourceAlphaMix()); + } + + // ---------------------------------------------------------------------------------------------------- + // initialization from the encoding bits of a previous encoding + // a_pblockParent points to the block associated with this encoding + // a_errormetric is used to choose the best encoding + // a_pafrgbaSource points to a 4x4 block subset of the source image + // a_paucEncodingBits points to the final encoding bits of a previous encoding + // + void Block4x4Encoding_RGBA8::Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) + { + // this won't iterate, but alpha values available for error calc + // but not using alpha in error calc anymore, so doing after RGB8 decode + m_alpha.Decode(a_paucEncodingBits, a_pafrgbaSource); + + m_pencodingbitsRGB8 = (Block4x4EncodingBits_RGB8 *)(a_paucEncodingBits + 8); + + // init RGB portion + Block4x4Encoding_RGB8::Decode(a_pblockParent, + (unsigned char *) m_pencodingbitsRGB8, + a_pafrgbaSource, + a_errormetric, + iterationCount); + } + + void Block4x4Encoding_RGBA8::DecodeAlpha() + { + // API hack toe be able to fill in the decodedPixels from the already Decode called alpha + // this is so regular Decode path doesn't do this decode and slow down multipass + m_alpha.DecodeAlpha(&m_afrgbaDecodedColors[0].fA); + } + + + // ---------------------------------------------------------------------------------------------------- + // perform a single encoding iteration + // replace the encoding if a better encoding was found + // subsequent iterations generally take longer for each iteration + // set m_boolDone if encoding is perfect or encoding is finished based on a_fEffort + // + // similar to Block4x4Encoding_RGB8_Base::Encode_RGB8(), but with alpha added + // + void Block4x4Encoding_RGBA8::PerformIteration(float a_fEffort) + { + // return if color and alpha done, note alpha only iterates on 0 + if (m_boolDone && m_alpha.IsDone() ) + { + return; + } + + if (m_uiEncodingIterations == 0) + { + m_alpha.PerformIteration(a_fEffort); + + /* TODO: can only do this if color if encoding premul color + but kram already knocks out all the color channels in this cae + + // this skips writing out color too + if (m_pblockParent->GetSourceAlphaMix() == Block4x4::SourceAlphaMix::TRANSPARENT) + { + m_mode = MODE_ETC1; + m_boolDiff = true; + m_boolFlip = false; + + // none of these were cleared, like RGBA1 case + m_uiCW1 = 0; + m_uiCW2 = 0; + + m_frgbaColor1 = ColorFloatRGBA(); + m_frgbaColor2 = ColorFloatRGBA(); + + for (int uiPixel = 0; uiPixel < PIXELS; uiPixel++) + { + m_afrgbaDecodedColors[uiPixel] = ColorFloatRGBA(); // assumes rgb also 0 + //m_afDecodedAlphas[uiPixel] = 0.0f; + } + + m_fError = 0.0f; + + // skip processing rgb + m_boolDone = true; + //m_uiEncodingIterations++; + } + */ + } + + if (!m_boolDone) + { + Block4x4Encoding_RGB8::PerformIteration(a_fEffort); + } + + } + + // ---------------------------------------------------------------------------------------------------- + // set the encoding bits based on encoding state + // + void Block4x4Encoding_RGBA8::SetEncodingBits(void) + { + // set the RGB8 portion + Block4x4Encoding_RGB8::SetEncodingBits(); + + m_alpha.SetEncodingBits(); + } +} diff --git a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h index 9602ff0b..7439fea8 100644 --- a/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h +++ b/libkram/etc2comp/EtcBlock4x4Encoding_RGBA8.h @@ -1,100 +1,100 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcBlock4x4Encoding_RGB8.h" - -#include "EtcBlock4x4.h" // for SourceAlphaMix - -namespace Etc -{ - class Block4x4EncodingBits_A8; - - // ################################################################################ - // Block4x4Encoding_RGBA8 - // RGBA8 if not completely opaque or transparent - // ################################################################################ - - // Encoder for the A8 portion of RGBA. Minimizes error in a single iteration. - class Block4x4Encoding_A8 - { - public: - Block4x4Encoding_A8(void); - ~Block4x4Encoding_A8(void); - - void Encode(const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, - Block4x4::SourceAlphaMix sourceAlphaMix); - - void Decode(unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource); - - void DecodeAlpha(float *decodedPixels); - - void PerformIteration(float a_fEffort); - void CalculateA8(int a_fRadius); - void SetEncodingBits(void); - - bool IsDone() const { return m_boolDone; } - - private: - static const int PIXELS = 16; - - Block4x4EncodingBits_A8 *m_pencodingbitsA8; - - // float* m_afDecodedAlphas; // alias to parent array - //Block4x4::SourceAlphaMix m_sourceAlphaMix; - - const ColorFloatRGBA* m_pafrgbaSource; - - uint8_t m_fBase; - uint8_t m_fMultiplier; - uint8_t m_uiModifierTableIndex; - uint8_t m_auiAlphaSelectors[PIXELS]; - - bool m_boolDone; - }; - - // This basically combines RGBA8 encoder with A8 encoder - class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8 - { - public: - - Block4x4Encoding_RGBA8(void); - virtual ~Block4x4Encoding_RGBA8(void); - - virtual void Encode(Block4x4 *a_pblockParent, - const ColorFloatRGBA *a_pafrgbaSource, - unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) override; - - virtual void Decode(Block4x4 *a_pblockParent, - unsigned char *a_paucEncodingBits, - const ColorFloatRGBA *a_pafrgbaSource, - ErrorMetric a_errormetric, - uint16_t iterationCount) override; - - virtual void DecodeAlpha() override; - - virtual void PerformIteration(float a_fEffort) override; - - virtual void SetEncodingBits(void) override; - - private: - Block4x4Encoding_A8 m_alpha; - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcBlock4x4Encoding_RGB8.h" + +#include "EtcBlock4x4.h" // for SourceAlphaMix + +namespace Etc +{ + class Block4x4EncodingBits_A8; + + // ################################################################################ + // Block4x4Encoding_RGBA8 + // RGBA8 if not completely opaque or transparent + // ################################################################################ + + // Encoder for the A8 portion of RGBA. Minimizes error in a single iteration. + class Block4x4Encoding_A8 + { + public: + Block4x4Encoding_A8(void); + ~Block4x4Encoding_A8(void); + + void Encode(const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, + Block4x4::SourceAlphaMix sourceAlphaMix); + + void Decode(unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource); + + void DecodeAlpha(float *decodedPixels); + + void PerformIteration(float a_fEffort); + void CalculateA8(int a_fRadius); + void SetEncodingBits(void); + + bool IsDone() const { return m_boolDone; } + + private: + static const int PIXELS = 16; + + Block4x4EncodingBits_A8 *m_pencodingbitsA8; + + // float* m_afDecodedAlphas; // alias to parent array + //Block4x4::SourceAlphaMix m_sourceAlphaMix; + + const ColorFloatRGBA* m_pafrgbaSource; + + uint8_t m_fBase; + uint8_t m_fMultiplier; + uint8_t m_uiModifierTableIndex; + uint8_t m_auiAlphaSelectors[PIXELS]; + + bool m_boolDone; + }; + + // This basically combines RGBA8 encoder with A8 encoder + class Block4x4Encoding_RGBA8 : public Block4x4Encoding_RGB8 + { + public: + + Block4x4Encoding_RGBA8(void); + virtual ~Block4x4Encoding_RGBA8(void); + + virtual void Encode(Block4x4 *a_pblockParent, + const ColorFloatRGBA *a_pafrgbaSource, + unsigned char *a_paucEncodingBits, ErrorMetric a_errormetric) override; + + virtual void Decode(Block4x4 *a_pblockParent, + unsigned char *a_paucEncodingBits, + const ColorFloatRGBA *a_pafrgbaSource, + ErrorMetric a_errormetric, + uint16_t iterationCount) override; + + virtual void DecodeAlpha() override; + + virtual void PerformIteration(float a_fEffort) override; + + virtual void SetEncodingBits(void) override; + + private: + Block4x4Encoding_A8 m_alpha; + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcColor.h b/libkram/etc2comp/EtcColor.h index a4c40fb9..fff15cf0 100644 --- a/libkram/etc2comp/EtcColor.h +++ b/libkram/etc2comp/EtcColor.h @@ -1,66 +1,66 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -#pragma once - -#include - -namespace Etc -{ - - inline float LogToLinear(float a_fLog) - { - static const float ALPHA = 0.055f; - static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; - - if (a_fLog <= 0.04045f) - { - return a_fLog / 12.92f; - } - else - { - return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f); - } - } - - inline float LinearToLog(float a_fLinear) - { - static const float ALPHA = 0.055f; - static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; - - if (a_fLinear <= 0.0031308f) - { - return 12.92f * a_fLinear; - } - else - { - return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA; - } - } - - class ColorR8G8B8A8 - { - public: - - unsigned char ucR; - unsigned char ucG; - unsigned char ucB; - unsigned char ucA; - - }; -} -*/ +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +#pragma once + +#include + +namespace Etc +{ + + inline float LogToLinear(float a_fLog) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLog <= 0.04045f) + { + return a_fLog / 12.92f; + } + else + { + return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f); + } + } + + inline float LinearToLog(float a_fLinear) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLinear <= 0.0031308f) + { + return 12.92f * a_fLinear; + } + else + { + return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA; + } + } + + class ColorR8G8B8A8 + { + public: + + unsigned char ucR; + unsigned char ucG; + unsigned char ucB; + unsigned char ucA; + + }; +} +*/ diff --git a/libkram/etc2comp/EtcColorFloatRGBA.h b/libkram/etc2comp/EtcColorFloatRGBA.h index 162debc5..d387763c 100644 --- a/libkram/etc2comp/EtcColorFloatRGBA.h +++ b/libkram/etc2comp/EtcColorFloatRGBA.h @@ -1,316 +1,316 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcConfig.h" -//#include "EtcColor.h" - -#include - -namespace Etc -{ - inline float LogToLinear(float a_fLog) - { - static const float ALPHA = 0.055f; - static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; - - if (a_fLog <= 0.04045f) - { - return a_fLog / 12.92f; - } - else - { - return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f); - } - } - - inline float LinearToLog(float a_fLinear) - { - static const float ALPHA = 0.055f; - static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; - - if (a_fLinear <= 0.0031308f) - { - return 12.92f * a_fLinear; - } - else - { - return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA; - } - } - - class ColorR8G8B8A8 - { - public: - - uint8_t ucR; - uint8_t ucG; - uint8_t ucB; - uint8_t ucA; - - }; - - class ColorFloatRGBA - { - public: - - ColorFloatRGBA(void) - { - fR = fG = fB = fA = 0.0f; - } - - ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA) - { - fR = a_fR; - fG = a_fG; - fB = a_fB; - fA = a_fA; - } - - inline ColorFloatRGBA operator+(const ColorFloatRGBA& a_rfrgba) const - { - ColorFloatRGBA frgba; - frgba.fR = fR + a_rfrgba.fR; - frgba.fG = fG + a_rfrgba.fG; - frgba.fB = fB + a_rfrgba.fB; - frgba.fA = fA + a_rfrgba.fA; - return frgba; - } - - inline ColorFloatRGBA operator-(const ColorFloatRGBA& a_rfrgba) const - { - ColorFloatRGBA frgba; - frgba.fR = fR - a_rfrgba.fR; - frgba.fG = fG - a_rfrgba.fG; - frgba.fB = fB - a_rfrgba.fB; - frgba.fA = fA - a_rfrgba.fA; - return frgba; - } - - // scalar ops don't apply to alpha - inline ColorFloatRGBA operator+(float a_f) const - { - ColorFloatRGBA frgba; - frgba.fR = fR + a_f; - frgba.fG = fG + a_f; - frgba.fB = fB + a_f; - frgba.fA = fA; - return frgba; - } - - // scalar ops don't apply to alpha - inline ColorFloatRGBA operator-(float a_f) const - { - return *this + (-a_f); - } - - - // scalar ops don't apply to alpha - inline ColorFloatRGBA operator*(float a_f) const - { - return ScaleRGB(a_f); - } - - inline ColorFloatRGBA ScaleRGB(float a_f) const - { - ColorFloatRGBA frgba; - frgba.fR = fR * a_f; - frgba.fG = fG * a_f; - frgba.fB = fB * a_f; - frgba.fA = fA; - - return frgba; - } - - inline ColorFloatRGBA RoundRGB(void) const - { - ColorFloatRGBA frgba; - frgba.fR = roundf(fR); - frgba.fG = roundf(fG); - frgba.fB = roundf(fB); - frgba.fA = fA; // was missing in original - - return frgba; - } - - inline ColorFloatRGBA ToLinear() const - { - ColorFloatRGBA frgbaLinear; - frgbaLinear.fR = LogToLinear(fR); - frgbaLinear.fG = LogToLinear(fG); - frgbaLinear.fB = LogToLinear(fB); - frgbaLinear.fA = fA; - - return frgbaLinear; - } - - inline ColorFloatRGBA ToLog(void) const - { - ColorFloatRGBA frgbaLog; - frgbaLog.fR = LinearToLog(fR); - frgbaLog.fG = LinearToLog(fG); - frgbaLog.fB = LinearToLog(fB); - frgbaLog.fA = fA; - - return frgbaLog; - } - - inline static ColorFloatRGBA ConvertFromRGBA8(uint8_t a_ucR, - uint8_t a_ucG, uint8_t a_ucB, uint8_t a_ucA) - { - ColorFloatRGBA frgba; - - frgba.fR = (float)a_ucR / 255.0f; - frgba.fG = (float)a_ucG / 255.0f; - frgba.fB = (float)a_ucB / 255.0f; - frgba.fA = (float)a_ucA / 255.0f; - - return frgba; - } - - inline static ColorFloatRGBA ConvertFromRGBA8(const ColorR8G8B8A8& color) - { - return ConvertFromRGBA8(color.ucR, color.ucG, color.ucB, color.ucA); - } - - inline static ColorFloatRGBA ConvertFromRGB4(uint8_t a_ucR4, - uint8_t a_ucG4, - uint8_t a_ucB4, uint8_t a_ucA = 255) - { - uint8_t ucR8 = (uint8_t)((a_ucR4 << 4) + a_ucR4); - uint8_t ucG8 = (uint8_t)((a_ucG4 << 4) + a_ucG4); - uint8_t ucB8 = (uint8_t)((a_ucB4 << 4) + a_ucB4); - - return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); - } - - inline static ColorFloatRGBA ConvertFromRGB5(uint8_t a_ucR5, - uint8_t a_ucG5, - uint8_t a_ucB5, uint8_t a_ucA = 255) - { - uint8_t ucR8 = (uint8_t)((a_ucR5 << 3) + (a_ucR5 >> 2)); - uint8_t ucG8 = (uint8_t)((a_ucG5 << 3) + (a_ucG5 >> 2)); - uint8_t ucB8 = (uint8_t)((a_ucB5 << 3) + (a_ucB5 >> 2)); - - return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); - } - - inline static ColorFloatRGBA ConvertFromR6G7B6(uint8_t a_ucR6, - uint8_t a_ucG7, - uint8_t a_ucB6, uint8_t a_ucA = 255) - { - uint8_t ucR8 = (uint8_t)((a_ucR6 << 2) + (a_ucR6 >> 4)); - uint8_t ucG8 = (uint8_t)((a_ucG7 << 1) + (a_ucG7 >> 6)); - uint8_t ucB8 = (uint8_t)((a_ucB6 << 2) + (a_ucB6 >> 4)); - - return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); - } - - // quantize to 4 bits, expand to 8 bits - inline ColorFloatRGBA QuantizeR4G4B4(void) const - { - ColorFloatRGBA frgba = ClampRGB(); - - // quantize to 4 bits - frgba = frgba.ScaleRGB(15.0f).RoundRGB(); - uint32_t uiR4 = (uint32_t)frgba.fR; - uint32_t uiG4 = (uint32_t)frgba.fG; - uint32_t uiB4 = (uint32_t)frgba.fB; - - frgba = ConvertFromRGB4(uiR4, uiG4, uiB4); - frgba.fA = fA; - - return frgba; - } - - // quantize to 5 bits, expand to 8 bits - inline ColorFloatRGBA QuantizeR5G5B5(void) const - { - ColorFloatRGBA frgba = ClampRGBA(); - - // quantize to 5 bits - frgba = frgba.ScaleRGB(31.0f).RoundRGB(); - uint32_t uiR5 = (uint32_t)frgba.fR; - uint32_t uiG5 = (uint32_t)frgba.fG; - uint32_t uiB5 = (uint32_t)frgba.fB; - - frgba = ConvertFromRGB5(uiR5, uiG5, uiB5); - frgba.fA = fA; - return frgba; - } - - // quantize to 6/7/6 bits, expand to 8 bits - inline ColorFloatRGBA QuantizeR6G7B6(void) const - { - ColorFloatRGBA frgba = ClampRGBA(); - - // quantize to 6/7/6 bits - uint32_t uiR6 = (uint32_t)frgba.IntRed(63.0f); - uint32_t uiG7 = (uint32_t)frgba.IntGreen(127.0f); - uint32_t uiB6 = (uint32_t)frgba.IntBlue(63.0f); - - frgba = ConvertFromR6G7B6(uiR6, uiG7, uiB6); - frgba.fA = fA; - - return frgba; - } - - inline ColorFloatRGBA ClampRGB(void) const - { - return ClampRGBA(); - } - - inline ColorFloatRGBA ClampRGBA(void) const - { - ColorFloatRGBA frgba = *this; - if (frgba.fR < 0.0f) { frgba.fR = 0.0f; } - if (frgba.fR > 1.0f) { frgba.fR = 1.0f; } - if (frgba.fG < 0.0f) { frgba.fG = 0.0f; } - if (frgba.fG > 1.0f) { frgba.fG = 1.0f; } - if (frgba.fB < 0.0f) { frgba.fB = 0.0f; } - if (frgba.fB > 1.0f) { frgba.fB = 1.0f; } - if (frgba.fA < 0.0f) { frgba.fA = 0.0f; } - if (frgba.fA > 1.0f) { frgba.fA = 1.0f; } - - return frgba; - } - - inline int IntRed(float a_fScale) const - { - return (int)roundf(fR * a_fScale); - } - - inline int IntGreen(float a_fScale) const - { - return (int)roundf(fG * a_fScale); - } - - inline int IntBlue(float a_fScale) const - { - return (int)roundf(fB * a_fScale); - } - - inline int IntAlpha(float a_fScale) const - { - return (int)roundf(fA * a_fScale); - } - - float fR, fG, fB, fA; - }; - -} - +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcConfig.h" +//#include "EtcColor.h" + +#include + +namespace Etc +{ + inline float LogToLinear(float a_fLog) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLog <= 0.04045f) + { + return a_fLog / 12.92f; + } + else + { + return powf((a_fLog + ALPHA) / ONE_PLUS_ALPHA, 2.4f); + } + } + + inline float LinearToLog(float a_fLinear) + { + static const float ALPHA = 0.055f; + static const float ONE_PLUS_ALPHA = 1.0f + ALPHA; + + if (a_fLinear <= 0.0031308f) + { + return 12.92f * a_fLinear; + } + else + { + return ONE_PLUS_ALPHA * powf(a_fLinear, (1.0f/2.4f)) - ALPHA; + } + } + + class ColorR8G8B8A8 + { + public: + + uint8_t ucR; + uint8_t ucG; + uint8_t ucB; + uint8_t ucA; + + }; + + class ColorFloatRGBA + { + public: + + ColorFloatRGBA(void) + { + fR = fG = fB = fA = 0.0f; + } + + ColorFloatRGBA(float a_fR, float a_fG, float a_fB, float a_fA) + { + fR = a_fR; + fG = a_fG; + fB = a_fB; + fA = a_fA; + } + + inline ColorFloatRGBA operator+(const ColorFloatRGBA& a_rfrgba) const + { + ColorFloatRGBA frgba; + frgba.fR = fR + a_rfrgba.fR; + frgba.fG = fG + a_rfrgba.fG; + frgba.fB = fB + a_rfrgba.fB; + frgba.fA = fA + a_rfrgba.fA; + return frgba; + } + + inline ColorFloatRGBA operator-(const ColorFloatRGBA& a_rfrgba) const + { + ColorFloatRGBA frgba; + frgba.fR = fR - a_rfrgba.fR; + frgba.fG = fG - a_rfrgba.fG; + frgba.fB = fB - a_rfrgba.fB; + frgba.fA = fA - a_rfrgba.fA; + return frgba; + } + + // scalar ops don't apply to alpha + inline ColorFloatRGBA operator+(float a_f) const + { + ColorFloatRGBA frgba; + frgba.fR = fR + a_f; + frgba.fG = fG + a_f; + frgba.fB = fB + a_f; + frgba.fA = fA; + return frgba; + } + + // scalar ops don't apply to alpha + inline ColorFloatRGBA operator-(float a_f) const + { + return *this + (-a_f); + } + + + // scalar ops don't apply to alpha + inline ColorFloatRGBA operator*(float a_f) const + { + return ScaleRGB(a_f); + } + + inline ColorFloatRGBA ScaleRGB(float a_f) const + { + ColorFloatRGBA frgba; + frgba.fR = fR * a_f; + frgba.fG = fG * a_f; + frgba.fB = fB * a_f; + frgba.fA = fA; + + return frgba; + } + + inline ColorFloatRGBA RoundRGB(void) const + { + ColorFloatRGBA frgba; + frgba.fR = roundf(fR); + frgba.fG = roundf(fG); + frgba.fB = roundf(fB); + frgba.fA = fA; // was missing in original + + return frgba; + } + + inline ColorFloatRGBA ToLinear() const + { + ColorFloatRGBA frgbaLinear; + frgbaLinear.fR = LogToLinear(fR); + frgbaLinear.fG = LogToLinear(fG); + frgbaLinear.fB = LogToLinear(fB); + frgbaLinear.fA = fA; + + return frgbaLinear; + } + + inline ColorFloatRGBA ToLog(void) const + { + ColorFloatRGBA frgbaLog; + frgbaLog.fR = LinearToLog(fR); + frgbaLog.fG = LinearToLog(fG); + frgbaLog.fB = LinearToLog(fB); + frgbaLog.fA = fA; + + return frgbaLog; + } + + inline static ColorFloatRGBA ConvertFromRGBA8(uint8_t a_ucR, + uint8_t a_ucG, uint8_t a_ucB, uint8_t a_ucA) + { + ColorFloatRGBA frgba; + + frgba.fR = (float)a_ucR / 255.0f; + frgba.fG = (float)a_ucG / 255.0f; + frgba.fB = (float)a_ucB / 255.0f; + frgba.fA = (float)a_ucA / 255.0f; + + return frgba; + } + + inline static ColorFloatRGBA ConvertFromRGBA8(const ColorR8G8B8A8& color) + { + return ConvertFromRGBA8(color.ucR, color.ucG, color.ucB, color.ucA); + } + + inline static ColorFloatRGBA ConvertFromRGB4(uint8_t a_ucR4, + uint8_t a_ucG4, + uint8_t a_ucB4, uint8_t a_ucA = 255) + { + uint8_t ucR8 = (uint8_t)((a_ucR4 << 4) + a_ucR4); + uint8_t ucG8 = (uint8_t)((a_ucG4 << 4) + a_ucG4); + uint8_t ucB8 = (uint8_t)((a_ucB4 << 4) + a_ucB4); + + return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); + } + + inline static ColorFloatRGBA ConvertFromRGB5(uint8_t a_ucR5, + uint8_t a_ucG5, + uint8_t a_ucB5, uint8_t a_ucA = 255) + { + uint8_t ucR8 = (uint8_t)((a_ucR5 << 3) + (a_ucR5 >> 2)); + uint8_t ucG8 = (uint8_t)((a_ucG5 << 3) + (a_ucG5 >> 2)); + uint8_t ucB8 = (uint8_t)((a_ucB5 << 3) + (a_ucB5 >> 2)); + + return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); + } + + inline static ColorFloatRGBA ConvertFromR6G7B6(uint8_t a_ucR6, + uint8_t a_ucG7, + uint8_t a_ucB6, uint8_t a_ucA = 255) + { + uint8_t ucR8 = (uint8_t)((a_ucR6 << 2) + (a_ucR6 >> 4)); + uint8_t ucG8 = (uint8_t)((a_ucG7 << 1) + (a_ucG7 >> 6)); + uint8_t ucB8 = (uint8_t)((a_ucB6 << 2) + (a_ucB6 >> 4)); + + return ConvertFromRGBA8(ucR8, ucG8, ucB8, a_ucA); + } + + // quantize to 4 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR4G4B4(void) const + { + ColorFloatRGBA frgba = ClampRGB(); + + // quantize to 4 bits + frgba = frgba.ScaleRGB(15.0f).RoundRGB(); + uint32_t uiR4 = (uint32_t)frgba.fR; + uint32_t uiG4 = (uint32_t)frgba.fG; + uint32_t uiB4 = (uint32_t)frgba.fB; + + frgba = ConvertFromRGB4(uiR4, uiG4, uiB4); + frgba.fA = fA; + + return frgba; + } + + // quantize to 5 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR5G5B5(void) const + { + ColorFloatRGBA frgba = ClampRGBA(); + + // quantize to 5 bits + frgba = frgba.ScaleRGB(31.0f).RoundRGB(); + uint32_t uiR5 = (uint32_t)frgba.fR; + uint32_t uiG5 = (uint32_t)frgba.fG; + uint32_t uiB5 = (uint32_t)frgba.fB; + + frgba = ConvertFromRGB5(uiR5, uiG5, uiB5); + frgba.fA = fA; + return frgba; + } + + // quantize to 6/7/6 bits, expand to 8 bits + inline ColorFloatRGBA QuantizeR6G7B6(void) const + { + ColorFloatRGBA frgba = ClampRGBA(); + + // quantize to 6/7/6 bits + uint32_t uiR6 = (uint32_t)frgba.IntRed(63.0f); + uint32_t uiG7 = (uint32_t)frgba.IntGreen(127.0f); + uint32_t uiB6 = (uint32_t)frgba.IntBlue(63.0f); + + frgba = ConvertFromR6G7B6(uiR6, uiG7, uiB6); + frgba.fA = fA; + + return frgba; + } + + inline ColorFloatRGBA ClampRGB(void) const + { + return ClampRGBA(); + } + + inline ColorFloatRGBA ClampRGBA(void) const + { + ColorFloatRGBA frgba = *this; + if (frgba.fR < 0.0f) { frgba.fR = 0.0f; } + if (frgba.fR > 1.0f) { frgba.fR = 1.0f; } + if (frgba.fG < 0.0f) { frgba.fG = 0.0f; } + if (frgba.fG > 1.0f) { frgba.fG = 1.0f; } + if (frgba.fB < 0.0f) { frgba.fB = 0.0f; } + if (frgba.fB > 1.0f) { frgba.fB = 1.0f; } + if (frgba.fA < 0.0f) { frgba.fA = 0.0f; } + if (frgba.fA > 1.0f) { frgba.fA = 1.0f; } + + return frgba; + } + + inline int IntRed(float a_fScale) const + { + return (int)roundf(fR * a_fScale); + } + + inline int IntGreen(float a_fScale) const + { + return (int)roundf(fG * a_fScale); + } + + inline int IntBlue(float a_fScale) const + { + return (int)roundf(fB * a_fScale); + } + + inline int IntAlpha(float a_fScale) const + { + return (int)roundf(fA * a_fScale); + } + + float fR, fG, fB, fA; + }; + +} + diff --git a/libkram/etc2comp/EtcConfig.h b/libkram/etc2comp/EtcConfig.h index f706da8a..7c9ddac7 100644 --- a/libkram/etc2comp/EtcConfig.h +++ b/libkram/etc2comp/EtcConfig.h @@ -1,19 +1,19 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include diff --git a/libkram/etc2comp/EtcDifferentialTrys.cpp b/libkram/etc2comp/EtcDifferentialTrys.cpp index aa1945b0..b6ffc429 100644 --- a/libkram/etc2comp/EtcDifferentialTrys.cpp +++ b/libkram/etc2comp/EtcDifferentialTrys.cpp @@ -1,175 +1,175 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcDifferentialTrys.cpp - -Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode - -*/ - -#include "EtcConfig.h" -#include "EtcDifferentialTrys.h" - -#include - -namespace Etc -{ - - // ---------------------------------------------------------------------------------------------------- - // construct a list of trys (encoding attempts) - // - // a_frgbaColor1 is the basecolor for the first half - // a_frgbaColor2 is the basecolor for the second half - // a_pauiPixelMapping1 is the pixel order for the first half - // a_pauiPixelMapping2 is the pixel order for the second half - // a_uiRadius is the amount to vary the base colors - // - DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, - const unsigned int *a_pauiPixelMapping1, - const unsigned int *a_pauiPixelMapping2, - unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2) - { - assert(a_uiRadius <= MAX_RADIUS); - - m_boolSeverelyBentColors = false; - - ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5(); - ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5(); - - // quantize base colors - // ensure that trys with a_uiRadius don't overflow - int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius); - int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius); - int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius); - - int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius); - int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius); - int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius); - - int iDeltaRed = iRed2 - iRed1; - int iDeltaGreen = iGreen2 - iGreen1; - int iDeltaBlue = iBlue2 - iBlue1; - - // make sure components are within range - { - if (iDeltaRed > 3) - { - if (iDeltaRed > 7) - { - m_boolSeverelyBentColors = true; - } - - iRed1 += (iDeltaRed - 3) / 2; - iRed2 = iRed1 + 3; - iDeltaRed = 3; - } - else if (iDeltaRed < -4) - { - if (iDeltaRed < -8) - { - m_boolSeverelyBentColors = true; - } - - iRed1 += (iDeltaRed + 4) / 2; - iRed2 = iRed1 - 4; - iDeltaRed = -4; - } - assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius)); - assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius)); - assert(iDeltaRed >= -4 && iDeltaRed <= 3); - - if (iDeltaGreen > 3) - { - if (iDeltaGreen > 7) - { - m_boolSeverelyBentColors = true; - } - - iGreen1 += (iDeltaGreen - 3) / 2; - iGreen2 = iGreen1 + 3; - iDeltaGreen = 3; - } - else if (iDeltaGreen < -4) - { - if (iDeltaGreen < -8) - { - m_boolSeverelyBentColors = true; - } - - iGreen1 += (iDeltaGreen + 4) / 2; - iGreen2 = iGreen1 - 4; - iDeltaGreen = -4; - } - assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius)); - assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius)); - assert(iDeltaGreen >= -4 && iDeltaGreen <= 3); - - if (iDeltaBlue > 3) - { - if (iDeltaBlue > 7) - { - m_boolSeverelyBentColors = true; - } - - iBlue1 += (iDeltaBlue - 3) / 2; - iBlue2 = iBlue1 + 3; - iDeltaBlue = 3; - } - else if (iDeltaBlue < -4) - { - if (iDeltaBlue < -8) - { - m_boolSeverelyBentColors = true; - } - - iBlue1 += (iDeltaBlue + 4) / 2; - iBlue2 = iBlue1 - 4; - iDeltaBlue = -4; - } - assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius)); - assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius)); - assert(iDeltaBlue >= -4 && iDeltaBlue <= 3); - } - - m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); - m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); - - } - - // ---------------------------------------------------------------------------------------------------- - // - void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, - const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) - { - - m_iRed = a_iRed; - m_iGreen = a_iGreen; - m_iBlue = a_iBlue; - - m_pauiPixelMapping = a_pauiPixelMapping; - m_uiRadius = a_uiRadius; - - m_uiTrys = 0; - m_ptryBest = nullptr; - - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcDifferentialTrys.cpp + +Gathers the results of the various encoding trys for both halves of a 4x4 block for Differential mode + +*/ + +#include "EtcConfig.h" +#include "EtcDifferentialTrys.h" + +#include + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // construct a list of trys (encoding attempts) + // + // a_frgbaColor1 is the basecolor for the first half + // a_frgbaColor2 is the basecolor for the second half + // a_pauiPixelMapping1 is the pixel order for the first half + // a_pauiPixelMapping2 is the pixel order for the second half + // a_uiRadius is the amount to vary the base colors + // + DifferentialTrys::DifferentialTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2) + { + assert(a_uiRadius <= MAX_RADIUS); + + m_boolSeverelyBentColors = false; + + ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR5G5B5(); + ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR5G5B5(); + + // quantize base colors + // ensure that trys with a_uiRadius don't overflow + int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(31.0f)+a_iGrayOffset1, a_uiRadius); + int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(31.0f) + a_iGrayOffset1, a_uiRadius); + int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(31.0f) + a_iGrayOffset1, a_uiRadius); + + int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(31.0f) + a_iGrayOffset2, a_uiRadius); + int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(31.0f) + a_iGrayOffset2, a_uiRadius); + int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(31.0f) + a_iGrayOffset2, a_uiRadius); + + int iDeltaRed = iRed2 - iRed1; + int iDeltaGreen = iGreen2 - iGreen1; + int iDeltaBlue = iBlue2 - iBlue1; + + // make sure components are within range + { + if (iDeltaRed > 3) + { + if (iDeltaRed > 7) + { + m_boolSeverelyBentColors = true; + } + + iRed1 += (iDeltaRed - 3) / 2; + iRed2 = iRed1 + 3; + iDeltaRed = 3; + } + else if (iDeltaRed < -4) + { + if (iDeltaRed < -8) + { + m_boolSeverelyBentColors = true; + } + + iRed1 += (iDeltaRed + 4) / 2; + iRed2 = iRed1 - 4; + iDeltaRed = -4; + } + assert(iRed1 >= (signed)(0 + a_uiRadius) && iRed1 <= (signed)(31 - a_uiRadius)); + assert(iRed2 >= (signed)(0 + a_uiRadius) && iRed2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaRed >= -4 && iDeltaRed <= 3); + + if (iDeltaGreen > 3) + { + if (iDeltaGreen > 7) + { + m_boolSeverelyBentColors = true; + } + + iGreen1 += (iDeltaGreen - 3) / 2; + iGreen2 = iGreen1 + 3; + iDeltaGreen = 3; + } + else if (iDeltaGreen < -4) + { + if (iDeltaGreen < -8) + { + m_boolSeverelyBentColors = true; + } + + iGreen1 += (iDeltaGreen + 4) / 2; + iGreen2 = iGreen1 - 4; + iDeltaGreen = -4; + } + assert(iGreen1 >= (signed)(0 + a_uiRadius) && iGreen1 <= (signed)(31 - a_uiRadius)); + assert(iGreen2 >= (signed)(0 + a_uiRadius) && iGreen2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaGreen >= -4 && iDeltaGreen <= 3); + + if (iDeltaBlue > 3) + { + if (iDeltaBlue > 7) + { + m_boolSeverelyBentColors = true; + } + + iBlue1 += (iDeltaBlue - 3) / 2; + iBlue2 = iBlue1 + 3; + iDeltaBlue = 3; + } + else if (iDeltaBlue < -4) + { + if (iDeltaBlue < -8) + { + m_boolSeverelyBentColors = true; + } + + iBlue1 += (iDeltaBlue + 4) / 2; + iBlue2 = iBlue1 - 4; + iDeltaBlue = -4; + } + assert(iBlue1 >= (signed)(0+a_uiRadius) && iBlue1 <= (signed)(31 - a_uiRadius)); + assert(iBlue2 >= (signed)(0 + a_uiRadius) && iBlue2 <= (signed)(31 - a_uiRadius)); + assert(iDeltaBlue >= -4 && iDeltaBlue <= 3); + } + + m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); + m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); + + } + + // ---------------------------------------------------------------------------------------------------- + // + void DifferentialTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) + { + + m_iRed = a_iRed; + m_iGreen = a_iGreen; + m_iBlue = a_iBlue; + + m_pauiPixelMapping = a_pauiPixelMapping; + m_uiRadius = a_uiRadius; + + m_uiTrys = 0; + m_ptryBest = nullptr; + + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcDifferentialTrys.h b/libkram/etc2comp/EtcDifferentialTrys.h index 6b1cd9c9..71860908 100644 --- a/libkram/etc2comp/EtcDifferentialTrys.h +++ b/libkram/etc2comp/EtcDifferentialTrys.h @@ -1,97 +1,97 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcColorFloatRGBA.h" - -namespace Etc -{ - - class DifferentialTrys - { - public: - - static const unsigned int MAX_RADIUS = 2; - - DifferentialTrys(ColorFloatRGBA a_frgbaColor1, - ColorFloatRGBA a_frgbaColor2, - const unsigned int *a_pauiPixelMapping1, - const unsigned int *a_pauiPixelMapping2, - unsigned int a_uiRadius, - int a_iGrayOffset1, int a_iGrayOffset2); - - inline static int MoveAwayFromEdge(int a_i, int a_iDistance) - { - if (a_i < (0+ a_iDistance)) - { - return (0 + a_iDistance); - } - else if (a_i > (31- a_iDistance)) - { - return (31 - a_iDistance); - } - - return a_i; - } - - class Try - { - public : - static const unsigned int SELECTORS = 8; // per half - - int m_iRed; - int m_iGreen; - int m_iBlue; - unsigned int m_uiCW; - unsigned int m_auiSelectors[SELECTORS]; - float m_fError; - }; - - class Half - { - public: - - static const unsigned int MAX_TRYS = 125; - - void Init(int a_iRed, int a_iGreen, int a_iBlue, - const unsigned int *a_pauiPixelMapping, - unsigned int a_uiRadius); - - // center of trys - int m_iRed; - int m_iGreen; - int m_iBlue; - - const unsigned int *m_pauiPixelMapping; - unsigned int m_uiRadius; - - unsigned int m_uiTrys; - Try m_atry[MAX_TRYS]; - - Try *m_ptryBest; - }; - - Half m_half1; - Half m_half2; - - bool m_boolSeverelyBentColors; - }; - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +namespace Etc +{ + + class DifferentialTrys + { + public: + + static const unsigned int MAX_RADIUS = 2; + + DifferentialTrys(ColorFloatRGBA a_frgbaColor1, + ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius, + int a_iGrayOffset1, int a_iGrayOffset2); + + inline static int MoveAwayFromEdge(int a_i, int a_iDistance) + { + if (a_i < (0+ a_iDistance)) + { + return (0 + a_iDistance); + } + else if (a_i > (31- a_iDistance)) + { + return (31 - a_iDistance); + } + + return a_i; + } + + class Try + { + public : + static const unsigned int SELECTORS = 8; // per half + + int m_iRed; + int m_iGreen; + int m_iBlue; + unsigned int m_uiCW; + unsigned int m_auiSelectors[SELECTORS]; + float m_fError; + }; + + class Half + { + public: + + static const unsigned int MAX_TRYS = 125; + + void Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, + unsigned int a_uiRadius); + + // center of trys + int m_iRed; + int m_iGreen; + int m_iBlue; + + const unsigned int *m_pauiPixelMapping; + unsigned int m_uiRadius; + + unsigned int m_uiTrys; + Try m_atry[MAX_TRYS]; + + Try *m_ptryBest; + }; + + Half m_half1; + Half m_half2; + + bool m_boolSeverelyBentColors; + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcErrorMetric.h b/libkram/etc2comp/EtcErrorMetric.h index 993fab88..54a2f10e 100644 --- a/libkram/etc2comp/EtcErrorMetric.h +++ b/libkram/etc2comp/EtcErrorMetric.h @@ -1,66 +1,66 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace Etc -{ - - enum ErrorMetric - { - //RGBA, // Premul weighted RGB - //RGBX, - - GRAY, - REC709, // Luma weighted(RGB) + A*A - - NUMERIC, // X*X + Y*Y + Z*Z + W*W -// NUMERICX, // X*X -// NUMERICXY, // X*X + Y*Y -// -// NORMALXYZ, - // - //ERROR_METRICS, - // - //BT709 = REC709 - }; - - inline const char *ErrorMetricToString(ErrorMetric errorMetric) - { - switch (errorMetric) - { -// case RGBA: -// return "RGBA"; -// case RGBX: -// return "RGBX"; - case GRAY: - return "GRAY"; - case REC709: - return "REC709"; - case NUMERIC: - return "NUMERIC"; -// case NUMERICX: -// return "NUMERICX"; -// case NUMERICXY: -// return "NUMERICXY"; -// case NORMALXYZ: -// return "NORMALXYZ"; - //case ERROR_METRICS: - default: - return "UNKNOWN"; - } - } -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace Etc +{ + + enum ErrorMetric + { + //RGBA, // Premul weighted RGB + //RGBX, + + GRAY, + REC709, // Luma weighted(RGB) + A*A + + NUMERIC, // X*X + Y*Y + Z*Z + W*W +// NUMERICX, // X*X +// NUMERICXY, // X*X + Y*Y +// +// NORMALXYZ, + // + //ERROR_METRICS, + // + //BT709 = REC709 + }; + + inline const char *ErrorMetricToString(ErrorMetric errorMetric) + { + switch (errorMetric) + { +// case RGBA: +// return "RGBA"; +// case RGBX: +// return "RGBX"; + case GRAY: + return "GRAY"; + case REC709: + return "REC709"; + case NUMERIC: + return "NUMERIC"; +// case NUMERICX: +// return "NUMERICX"; +// case NUMERICXY: +// return "NUMERICXY"; +// case NORMALXYZ: +// return "NORMALXYZ"; + //case ERROR_METRICS: + default: + return "UNKNOWN"; + } + } +} // namespace Etc diff --git a/libkram/etc2comp/EtcImage.cpp b/libkram/etc2comp/EtcImage.cpp index 77f5a071..16eeeece 100644 --- a/libkram/etc2comp/EtcImage.cpp +++ b/libkram/etc2comp/EtcImage.cpp @@ -1,700 +1,700 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcImage.cpp - -Image is an array of 4x4 blocks that represent the encoding of the source image - -*/ - - -#include "EtcConfig.h" - -// is this needed? -//#if ETC_WINDOWS -//#include -//#endif - - -#include "EtcImage.h" - -#include "EtcBlock4x4.h" -#include "EtcBlock4x4EncodingBits.h" - -#include "EtcBlock4x4Encoding_R11.h" -#include "EtcBlock4x4Encoding_RG11.h" - -#include -//#include -#include -#include -//#include -#include -#include -#include -//#include - -#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f) -#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f) -#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f) - -// C++14 implement reverse iteration adaptor -// https://stackoverflow.com/questions/8542591/c11-reverse-range-based-for-loop -//template -//struct reverseIterator { T& iterable; }; -// -//template -//auto begin (reverseIterator w) { return std::rbegin(w.iterable); } -// -//template -//auto end (reverseIterator w) { return std::rend(w.iterable); } -// -//template -//reverseIterator reverse (T&& iterable) { return { iterable }; } - -namespace Etc -{ - // ---------------------------------------------------------------------------------------------------- - // constructor using source image - Image::Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, unsigned int a_uiSourceWidth, - unsigned int a_uiSourceHeight, - ErrorMetric a_errormetric) - { - m_encodingStatus = EncodingStatus::SUCCESS; - m_uiSourceWidth = a_uiSourceWidth; - m_uiSourceHeight = a_uiSourceHeight; - - int uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth); - int uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight); - - m_uiBlockColumns = uiExtendedWidth >> 2; - m_uiBlockRows = uiExtendedHeight >> 2; - - m_format = a_format; - - m_encodingbitsformat = DetermineEncodingBitsFormat(m_format); - int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); - m_uiEncodingBitsBytes = GetNumberOfBlocks() * blockSize; - - m_paucEncodingBits = nullptr; - - m_errormetric = a_errormetric; - m_fEffort = 0.0f; - - m_iEncodeTime_ms = 0; - - m_bVerboseOutput = false; - - // this can be nullptr - m_pafrgbaSource = a_pafSourceRGBA; - } - - // ---------------------------------------------------------------------------------------------------- - // - Image::~Image(void) - { - } - - Image::EncodingStatus Image::EncodeSinglepass(float a_fEffort, uint8_t* outputTexture) - { - m_encodingStatus = EncodingStatus::SUCCESS; - m_fEffort = a_fEffort; - - // alias the output etxture - m_paucEncodingBits = outputTexture; - - //-------------------------- - // walk the src image as 4x4 blocks, and complete each block and output it - int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); - uint8_t *outputBlock = outputTexture; - int totalIterations = 0; - - switch(m_format) { - case Image::Format::R11: - case Image::Format::SIGNED_R11: - case Image::Format::RG11: - case Image::Format::SIGNED_RG11: - { - bool isSnorm = - m_format == Image::Format::SIGNED_R11 || - m_format == Image::Format::SIGNED_RG11; - bool isR = - m_format == Image::Format::R11 || - m_format == Image::Format::SIGNED_R11; - - IBlockEncoding* encoder; - if (isR) - encoder = new Block4x4Encoding_R11; - else - encoder = new Block4x4Encoding_RG11; - - ColorFloatRGBA sourcePixels[16]; - - for (int y = 0; y < (int)m_uiBlockRows; y++) - { - int srcY = y * 4; - - for (int x = 0; x < (int)m_uiBlockColumns; x++) - { - int srcX = x * 4; - - // now pull all pixels for the block, this clamps to edge - // NOTE: must convert from image horizontal scan to block vertical scan - - int uiPixel = 0; - - for (int xx = 0; xx < 4; xx++) - { - int srcXX = srcX + xx; - - for (int yy = 0; yy < 4; yy++) - { - int srcYY = srcY + yy; - - ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY); - sourcePixels[uiPixel++] = sourcePixel; - } - } - - // encode that block in as many iterations as it takes to finish - encoder->Encode(&sourcePixels[0].fR, outputBlock, isSnorm); - - // TODO: consider iterating red until done, then green for cache reasons - while (!encoder->IsDone()) - { - // iterate on lowest error until block is done, or quality iterations reached - encoder->PerformIteration(m_fEffort); - totalIterations++; - - // only do the first iteration - if (m_fEffort == 0.0) { - break; - } - } - - // store to etc block - encoder->SetEncodingBits(); - - outputBlock += blockSize; - } - } - - // this encoder isn't created/held by a block, so must be deleted - delete encoder; - - break; - } - default: - { - // Handle all the rgb/rgba formats which are much more involved - - Block4x4 block; - Block4x4Encoding* encoder = nullptr; - - for (int y = 0; y < (int)m_uiBlockRows; y++) - { - int srcY = y * 4; - - for (int x = 0; x < (int)m_uiBlockColumns; x++) - { - int srcX = x * 4; - - // this block copies out a 4x4 tile from the source image - block.Encode(this, srcX, srcY, outputBlock); - - // this encoder is allodated in first encode, then used for all blocks - if (!encoder) - { - encoder = block.GetEncoding(); - } - - while (!encoder->IsDone()) - { - // repeat until block is done, then store data - encoder->PerformIteration(m_fEffort); - totalIterations++; - - // only do the first iteration - if (m_fEffort == 0.0) { - break; - } - } - - // convert to etc block bits - encoder->SetEncodingBits(); - - outputBlock += blockSize; - } - } - break; - } - } - if (m_bVerboseOutput) - { - KLOGI("EtcComp", "Total iterations %d\n", totalIterations); - } - - // block deletes the encoding, so don't delete here - - return m_encodingStatus; - } - - // ---------------------------------------------------------------------------------------------------- - Image::EncodingStatus Image::Encode(float blockPercent, - float a_fEffort, - uint8_t* outputTexture) - { - - auto start = std::chrono::steady_clock::now(); - int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); - - m_fEffort = a_fEffort; - - // alias the output etxture - m_paucEncodingBits = outputTexture; - - using namespace NAMESPACE_STL; - - struct SortedBlock - { - //uint8_t lastIteration = 0; - uint16_t srcX = 0, srcY = 0; - uint16_t iterationData = 0; - - float error = FLT_MAX; - - // this must match sort operator below - bool operator>(const SortedBlock& rhs) const - { - return error > rhs.error; - } - }; - - int totalIterations = 0; - int numberOfBlocks = GetNumberOfBlocks(); - - vector sortedBlocks; - sortedBlocks.resize(numberOfBlocks); - - // now fill out the sorted blocks - for (int y = 0; y < (int)m_uiBlockRows; ++y) - { - int yy = y * m_uiBlockColumns; - - for (int x = 0; x < (int)m_uiBlockColumns; ++x) - { - sortedBlocks[yy + x].srcX = x; - sortedBlocks[yy + x].srcY = y; - } - } - - // NOTE: This is the questionable aspect of this encoder. - // It stops once say 49% of the blocks have finished. This means the other 51% may have huge errors - // compared to the source pixels. colorMap.d finishes in 1 pass since it's mostly gradients, but - // other textures need many more passes if content varies. - // - // One pass is done on all blocks to encode them all, then only - // the remaining blocks below this count are processed with the top errors in the sorted array. - // This number is also computed per mip level, but a change was made spend more time in mip blocks - // and less to large mips. But effort also affects how many iterations are performed and that affects quality. - - int numBlocksToFinish; - int minBlocks = 0; // 64*64; - - if (numberOfBlocks >= minBlocks) - { - numBlocksToFinish = static_cast(roundf(0.01f * blockPercent * numberOfBlocks)); - - if (m_bVerboseOutput) - { - KLOGI("EtcComp", "Will only finish %d/%d blocks", numBlocksToFinish, numberOfBlocks); - } - } - else - { - // do all blocks below a certain count, so mips are fully procesed regardless of effor setting - numBlocksToFinish = numberOfBlocks; - } - - // iterate on all blocks at least once and possible more iterations - - // setup for rgb/a - Block4x4 block; - Block4x4Encoding* encoder = nullptr; - - // setup for r/rg11 - bool isSnorm = - m_format == Image::Format::SIGNED_R11 || - m_format == Image::Format::SIGNED_RG11; - bool isR = - m_format == Image::Format::R11 || - m_format == Image::Format::SIGNED_R11; - bool isRG = - m_format == Image::Format::RG11 || - m_format == Image::Format::SIGNED_RG11; - - IBlockEncoding* encoderRG = nullptr; - if (isR) - encoderRG = new Block4x4Encoding_R11; - else if (isRG) - encoderRG = new Block4x4Encoding_RG11; - - ColorFloatRGBA sourcePixels[16]; - - int pass = 0; - - while(true) - { - // At the end of encode, blocks are encoded back to the outputTexture - // that way no additional storage is needed, and only one block per thread - // is required. This doesn't do threading, since a process works on one texture. - for (auto& it : sortedBlocks) - { - int srcX = it.srcX; - int srcY = it.srcY; - - uint8_t* outputBlock = outputTexture + (srcY * m_uiBlockColumns + srcX) * blockSize; - - if (!encoderRG) { - // this block copies out a 4x4 tile from the source image - if (pass == 0) - { - block.Encode(this, srcX * 4, srcY * 4, outputBlock); - - // encoder is allocated on first encode, then reused for the rest - // to multithread, would need one block/encoder per therad - if (!encoder) - { - encoder = block.GetEncoding(); - } - } - else - { - block.Decode(srcX * 4, srcY * 4, outputBlock, this, pass); - } - - // this is one pass - encoder->PerformIteration(m_fEffort); - totalIterations++; - - // convert to etc block bits - encoder->SetEncodingBits(); - - it.iterationData = pass; - it.error = encoder->IsDone() ? 0.0f : encoder->GetError(); - } - else { - // different interface for r/rg11, but same logic as above - int uiPixel = 0; - - // this copy is a transpose of the block before encoding - for (int xx = 0; xx < 4; xx++) - { - int srcXX = 4 * srcX + xx; - - for (int yy = 0; yy < 4; yy++) - { - int srcYY = 4 * srcY + yy; - - ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY); - sourcePixels[uiPixel++] = sourcePixel; - } - } - - // encode that block in as many iterations as it takes to finish - if (pass == 0) - { - encoderRG->Encode(&sourcePixels[0].fR, outputBlock, isSnorm); - } - else - { - encoderRG->Decode(outputBlock, &sourcePixels[0].fR, isSnorm, it.iterationData); - } - - encoderRG->PerformIteration(m_fEffort); - totalIterations++; - - // store to etc block - encoderRG->SetEncodingBits(); - - it.iterationData = encoderRG->GetIterationCount(); - it.error = encoderRG->IsDone() ? 0.0f : encoderRG->GetError(); - } - - if (it.error == 0.0f) - { - numBlocksToFinish--; - - // stop once block count reached, but can only stop once all blocks encoded at least once - if (pass > 0 && numBlocksToFinish <= 0) - { - break; - } - } - } - - // stop if min effort level, only process blocks once - if (m_fEffort <= ETCCOMP_MIN_EFFORT_LEVEL) - { - break; - } - // stop if any pass finished all the blocks - if (numBlocksToFinish <= 0) - { - break; - } - - // sorts largest errors to front - NAMESPACE_STL::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater()); - - // lop off the end of the array where blocks are 0 error or don - int counter = 0; - for (int i = (int)sortedBlocks.size()-1; i >= 0; --i) - { - if (sortedBlocks[i].error == 0.0f) - { - counter++; - } - else - { - break; - } - } - - sortedBlocks.resize(sortedBlocks.size() - counter); - pass++; - } - - delete encoderRG; - - if (m_bVerboseOutput) - { - KLOGI("EtcComp", "Total iterations %d in %d passes\n", totalIterations, pass + 1); - } - - auto end = std::chrono::steady_clock::now(); - std::chrono::milliseconds elapsed = std::chrono::duration_cast(end - start); - m_iEncodeTime_ms = (int)elapsed.count(); - - return m_encodingStatus; - } - - Image::EncodingStatus Image::Decode(const uint8_t* etcBlocks, uint8_t* outputTexture) - { - // setup for rgb/a - Block4x4 block; - Block4x4Encoding* encoder = nullptr; - - // setup for r/rg11 - bool isSnorm = - m_format == Image::Format::SIGNED_R11 || - m_format == Image::Format::SIGNED_RG11; - bool isR = - m_format == Image::Format::R11 || - m_format == Image::Format::SIGNED_R11; - bool isRG = - m_format == Image::Format::RG11 || - m_format == Image::Format::SIGNED_RG11; - - IBlockEncoding* encoderRG = nullptr; - if (isR) - encoderRG = new Block4x4Encoding_R11; - else if (isRG) - encoderRG = new Block4x4Encoding_RG11; - - // initialized to 0 by ctor - ColorFloatRGBA dstPixels[16]; - - // r and rg wiil return yzw = 001 and zw = 01, rgb will return a = 1 - for (int i = 0; i < 16; ++i) - { - dstPixels[i].fA = 1.0f; - } - - int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); - - for (int yy = 0; yy < (int)m_uiBlockRows; ++yy) - { - for (int xx = 0; xx < (int)m_uiBlockColumns; ++xx) - { - int srcX = xx; - int srcY = yy; - - const uint8_t* srcBlock = etcBlocks + (srcY * m_uiBlockColumns + srcX) * blockSize; - - if (!encoderRG) - { - // this almost works except alpha on RGBA8 isn't set - - block.Decode(srcX * 4, srcY * 4, (unsigned char*)srcBlock, this, 0); - - if (!encoder) - { - encoder = block.GetEncoding(); - } - if (m_format == Image::Format::RGBA8 || - m_format == Image::Format::SRGBA8) - { - encoder->DecodeAlpha(); - } - - // now extract rgb and a from the encoding - for (int i = 0; i < 16; ++i) - { - dstPixels[i] = encoder->GetDecodedPixel(i); - } - } - else - { - // this fills out r or rg with float values that are unorm 0 to 1 (even for snorm) - encoderRG->DecodeOnly(srcBlock, &dstPixels[0].fR, isSnorm); - } - - // now convert float pixels back to unorm8, don't copy pixels in block outside of w/h bound - // I don't know if dstPixels array is transposed when decoded or not? - - - ColorR8G8B8A8* dstPixels8 = (ColorR8G8B8A8*)outputTexture; - for (int y = 0; y < 4; y++) - { - int yd = y + srcY * 4; - if (yd >= (int)m_uiSourceHeight) - { - break; - } - - for (int x = 0; x < 4; x++) - { - int xd = x + srcX * 4; - if (xd >= (int)m_uiSourceWidth) - { - continue; - } - - const ColorFloatRGBA& color = dstPixels[x * 4 + y]; // Note: pixel lookup transpose here - - ColorR8G8B8A8& dst = dstPixels8[yd * m_uiSourceWidth + xd]; - dst.ucR = (uint8_t)color.IntRed(255.0f); - dst.ucG = (uint8_t)color.IntGreen(255.0f); - dst.ucB = (uint8_t)color.IntBlue(255.0f); - dst.ucA = (uint8_t)color.IntAlpha(255.0f); - } - } - } - } - - delete encoderRG; - - return m_encodingStatus; - } - - // ---------------------------------------------------------------------------------------------------- - // return a string name for a given image format - // - const char * Image::EncodingFormatToString(Image::Format a_format) - { - switch (a_format) - { - case Image::Format::ETC1: - return "ETC1"; - case Image::Format::RGB8: - return "RGB8"; - case Image::Format::SRGB8: - return "SRGB8"; - - case Image::Format::RGB8A1: - return "RGB8A1"; - case Image::Format::SRGB8A1: - return "SRGB8A1"; - case Image::Format::RGBA8: - return "RGBA8"; - case Image::Format::SRGBA8: - return "SRGBA8"; - - case Image::Format::R11: - return "R11"; - case Image::Format::SIGNED_R11: - return "SIGNED_R11"; - - case Image::Format::RG11: - return "RG11"; - case Image::Format::SIGNED_RG11: - return "SIGNED_RG11"; - case Image::Format::FORMATS: - case Image::Format::UNKNOWN: - default: - return "UNKNOWN"; - } - } - - // ---------------------------------------------------------------------------------------------------- - // return a string name for the image's format - // - const char * Image::EncodingFormatToString(void) const - { - return EncodingFormatToString(m_format); - } - - // ---------------------------------------------------------------------------------------------------- - // determine the encoding bits format based on the encoding format - // the encoding bits format is a family of bit encodings that are shared across various encoding formats - // - Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format) - { - Block4x4EncodingBits::Format encodingbitsformat; - - // determine encoding bits format from image format - switch (a_format) - { - case Format::ETC1: - case Format::RGB8: - case Format::SRGB8: - encodingbitsformat = Block4x4EncodingBits::Format::RGB8; - break; - - case Format::RGBA8: - case Format::SRGBA8: - encodingbitsformat = Block4x4EncodingBits::Format::RGBA8; - break; - - case Format::R11: - case Format::SIGNED_R11: - encodingbitsformat = Block4x4EncodingBits::Format::R11; - break; - - case Format::RG11: - case Format::SIGNED_RG11: - encodingbitsformat = Block4x4EncodingBits::Format::RG11; - break; - - case Format::RGB8A1: - case Format::SRGB8A1: - encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1; - break; - - default: - encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN; - break; - } - - return encodingbitsformat; - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcImage.cpp + +Image is an array of 4x4 blocks that represent the encoding of the source image + +*/ + + +#include "EtcConfig.h" + +// is this needed? +//#if ETC_WINDOWS +//#include +//#endif + + +#include "EtcImage.h" + +#include "EtcBlock4x4.h" +#include "EtcBlock4x4EncodingBits.h" + +#include "EtcBlock4x4Encoding_R11.h" +#include "EtcBlock4x4Encoding_RG11.h" + +#include +//#include +#include +#include +//#include +#include +#include +#include +//#include + +#define ETCCOMP_MIN_EFFORT_LEVEL (0.0f) +#define ETCCOMP_DEFAULT_EFFORT_LEVEL (40.0f) +#define ETCCOMP_MAX_EFFORT_LEVEL (100.0f) + +// C++14 implement reverse iteration adaptor +// https://stackoverflow.com/questions/8542591/c11-reverse-range-based-for-loop +//template +//struct reverseIterator { T& iterable; }; +// +//template +//auto begin (reverseIterator w) { return std::rbegin(w.iterable); } +// +//template +//auto end (reverseIterator w) { return std::rend(w.iterable); } +// +//template +//reverseIterator reverse (T&& iterable) { return { iterable }; } + +namespace Etc +{ + // ---------------------------------------------------------------------------------------------------- + // constructor using source image + Image::Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, unsigned int a_uiSourceWidth, + unsigned int a_uiSourceHeight, + ErrorMetric a_errormetric) + { + m_encodingStatus = EncodingStatus::SUCCESS; + m_uiSourceWidth = a_uiSourceWidth; + m_uiSourceHeight = a_uiSourceHeight; + + int uiExtendedWidth = CalcExtendedDimension((unsigned short)m_uiSourceWidth); + int uiExtendedHeight = CalcExtendedDimension((unsigned short)m_uiSourceHeight); + + m_uiBlockColumns = uiExtendedWidth >> 2; + m_uiBlockRows = uiExtendedHeight >> 2; + + m_format = a_format; + + m_encodingbitsformat = DetermineEncodingBitsFormat(m_format); + int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + m_uiEncodingBitsBytes = GetNumberOfBlocks() * blockSize; + + m_paucEncodingBits = nullptr; + + m_errormetric = a_errormetric; + m_fEffort = 0.0f; + + m_iEncodeTime_ms = 0; + + m_bVerboseOutput = false; + + // this can be nullptr + m_pafrgbaSource = a_pafSourceRGBA; + } + + // ---------------------------------------------------------------------------------------------------- + // + Image::~Image(void) + { + } + + Image::EncodingStatus Image::EncodeSinglepass(float a_fEffort, uint8_t* outputTexture) + { + m_encodingStatus = EncodingStatus::SUCCESS; + m_fEffort = a_fEffort; + + // alias the output etxture + m_paucEncodingBits = outputTexture; + + //-------------------------- + // walk the src image as 4x4 blocks, and complete each block and output it + int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + uint8_t *outputBlock = outputTexture; + int totalIterations = 0; + + switch(m_format) { + case Image::Format::R11: + case Image::Format::SIGNED_R11: + case Image::Format::RG11: + case Image::Format::SIGNED_RG11: + { + bool isSnorm = + m_format == Image::Format::SIGNED_R11 || + m_format == Image::Format::SIGNED_RG11; + bool isR = + m_format == Image::Format::R11 || + m_format == Image::Format::SIGNED_R11; + + IBlockEncoding* encoder; + if (isR) + encoder = new Block4x4Encoding_R11; + else + encoder = new Block4x4Encoding_RG11; + + ColorFloatRGBA sourcePixels[16]; + + for (int y = 0; y < (int)m_uiBlockRows; y++) + { + int srcY = y * 4; + + for (int x = 0; x < (int)m_uiBlockColumns; x++) + { + int srcX = x * 4; + + // now pull all pixels for the block, this clamps to edge + // NOTE: must convert from image horizontal scan to block vertical scan + + int uiPixel = 0; + + for (int xx = 0; xx < 4; xx++) + { + int srcXX = srcX + xx; + + for (int yy = 0; yy < 4; yy++) + { + int srcYY = srcY + yy; + + ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY); + sourcePixels[uiPixel++] = sourcePixel; + } + } + + // encode that block in as many iterations as it takes to finish + encoder->Encode(&sourcePixels[0].fR, outputBlock, isSnorm); + + // TODO: consider iterating red until done, then green for cache reasons + while (!encoder->IsDone()) + { + // iterate on lowest error until block is done, or quality iterations reached + encoder->PerformIteration(m_fEffort); + totalIterations++; + + // only do the first iteration + if (m_fEffort == 0.0) { + break; + } + } + + // store to etc block + encoder->SetEncodingBits(); + + outputBlock += blockSize; + } + } + + // this encoder isn't created/held by a block, so must be deleted + delete encoder; + + break; + } + default: + { + // Handle all the rgb/rgba formats which are much more involved + + Block4x4 block; + Block4x4Encoding* encoder = nullptr; + + for (int y = 0; y < (int)m_uiBlockRows; y++) + { + int srcY = y * 4; + + for (int x = 0; x < (int)m_uiBlockColumns; x++) + { + int srcX = x * 4; + + // this block copies out a 4x4 tile from the source image + block.Encode(this, srcX, srcY, outputBlock); + + // this encoder is allodated in first encode, then used for all blocks + if (!encoder) + { + encoder = block.GetEncoding(); + } + + while (!encoder->IsDone()) + { + // repeat until block is done, then store data + encoder->PerformIteration(m_fEffort); + totalIterations++; + + // only do the first iteration + if (m_fEffort == 0.0) { + break; + } + } + + // convert to etc block bits + encoder->SetEncodingBits(); + + outputBlock += blockSize; + } + } + break; + } + } + if (m_bVerboseOutput) + { + KLOGI("EtcComp", "Total iterations %d\n", totalIterations); + } + + // block deletes the encoding, so don't delete here + + return m_encodingStatus; + } + + // ---------------------------------------------------------------------------------------------------- + Image::EncodingStatus Image::Encode(float blockPercent, + float a_fEffort, + uint8_t* outputTexture) + { + + auto start = std::chrono::steady_clock::now(); + int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + + m_fEffort = a_fEffort; + + // alias the output etxture + m_paucEncodingBits = outputTexture; + + using namespace STL_NAMESPACE; + + struct SortedBlock + { + //uint8_t lastIteration = 0; + uint16_t srcX = 0, srcY = 0; + uint16_t iterationData = 0; + + float error = FLT_MAX; + + // this must match sort operator below + bool operator>(const SortedBlock& rhs) const + { + return error > rhs.error; + } + }; + + int totalIterations = 0; + int numberOfBlocks = GetNumberOfBlocks(); + + vector sortedBlocks; + sortedBlocks.resize(numberOfBlocks); + + // now fill out the sorted blocks + for (int y = 0; y < (int)m_uiBlockRows; ++y) + { + int yy = y * m_uiBlockColumns; + + for (int x = 0; x < (int)m_uiBlockColumns; ++x) + { + sortedBlocks[yy + x].srcX = x; + sortedBlocks[yy + x].srcY = y; + } + } + + // NOTE: This is the questionable aspect of this encoder. + // It stops once say 49% of the blocks have finished. This means the other 51% may have huge errors + // compared to the source pixels. colorMap.d finishes in 1 pass since it's mostly gradients, but + // other textures need many more passes if content varies. + // + // One pass is done on all blocks to encode them all, then only + // the remaining blocks below this count are processed with the top errors in the sorted array. + // This number is also computed per mip level, but a change was made spend more time in mip blocks + // and less to large mips. But effort also affects how many iterations are performed and that affects quality. + + int numBlocksToFinish; + int minBlocks = 0; // 64*64; + + if (numberOfBlocks >= minBlocks) + { + numBlocksToFinish = static_cast(roundf(0.01f * blockPercent * numberOfBlocks)); + + if (m_bVerboseOutput) + { + KLOGI("EtcComp", "Will only finish %d/%d blocks", numBlocksToFinish, numberOfBlocks); + } + } + else + { + // do all blocks below a certain count, so mips are fully procesed regardless of effor setting + numBlocksToFinish = numberOfBlocks; + } + + // iterate on all blocks at least once and possible more iterations + + // setup for rgb/a + Block4x4 block; + Block4x4Encoding* encoder = nullptr; + + // setup for r/rg11 + bool isSnorm = + m_format == Image::Format::SIGNED_R11 || + m_format == Image::Format::SIGNED_RG11; + bool isR = + m_format == Image::Format::R11 || + m_format == Image::Format::SIGNED_R11; + bool isRG = + m_format == Image::Format::RG11 || + m_format == Image::Format::SIGNED_RG11; + + IBlockEncoding* encoderRG = nullptr; + if (isR) + encoderRG = new Block4x4Encoding_R11; + else if (isRG) + encoderRG = new Block4x4Encoding_RG11; + + ColorFloatRGBA sourcePixels[16]; + + int pass = 0; + + while(true) + { + // At the end of encode, blocks are encoded back to the outputTexture + // that way no additional storage is needed, and only one block per thread + // is required. This doesn't do threading, since a process works on one texture. + for (auto& it : sortedBlocks) + { + int srcX = it.srcX; + int srcY = it.srcY; + + uint8_t* outputBlock = outputTexture + (srcY * m_uiBlockColumns + srcX) * blockSize; + + if (!encoderRG) { + // this block copies out a 4x4 tile from the source image + if (pass == 0) + { + block.Encode(this, srcX * 4, srcY * 4, outputBlock); + + // encoder is allocated on first encode, then reused for the rest + // to multithread, would need one block/encoder per therad + if (!encoder) + { + encoder = block.GetEncoding(); + } + } + else + { + block.Decode(srcX * 4, srcY * 4, outputBlock, this, pass); + } + + // this is one pass + encoder->PerformIteration(m_fEffort); + totalIterations++; + + // convert to etc block bits + encoder->SetEncodingBits(); + + it.iterationData = pass; + it.error = encoder->IsDone() ? 0.0f : encoder->GetError(); + } + else { + // different interface for r/rg11, but same logic as above + int uiPixel = 0; + + // this copy is a transpose of the block before encoding + for (int xx = 0; xx < 4; xx++) + { + int srcXX = 4 * srcX + xx; + + for (int yy = 0; yy < 4; yy++) + { + int srcYY = 4 * srcY + yy; + + ColorFloatRGBA sourcePixel = this->GetSourcePixel(srcXX, srcYY); + sourcePixels[uiPixel++] = sourcePixel; + } + } + + // encode that block in as many iterations as it takes to finish + if (pass == 0) + { + encoderRG->Encode(&sourcePixels[0].fR, outputBlock, isSnorm); + } + else + { + encoderRG->Decode(outputBlock, &sourcePixels[0].fR, isSnorm, it.iterationData); + } + + encoderRG->PerformIteration(m_fEffort); + totalIterations++; + + // store to etc block + encoderRG->SetEncodingBits(); + + it.iterationData = encoderRG->GetIterationCount(); + it.error = encoderRG->IsDone() ? 0.0f : encoderRG->GetError(); + } + + if (it.error == 0.0f) + { + numBlocksToFinish--; + + // stop once block count reached, but can only stop once all blocks encoded at least once + if (pass > 0 && numBlocksToFinish <= 0) + { + break; + } + } + } + + // stop if min effort level, only process blocks once + if (m_fEffort <= ETCCOMP_MIN_EFFORT_LEVEL) + { + break; + } + // stop if any pass finished all the blocks + if (numBlocksToFinish <= 0) + { + break; + } + + // sorts largest errors to front + std::sort(sortedBlocks.begin(), sortedBlocks.end(), std::greater()); + + // lop off the end of the array where blocks are 0 error or don + int counter = 0; + for (int i = (int)sortedBlocks.size()-1; i >= 0; --i) + { + if (sortedBlocks[i].error == 0.0f) + { + counter++; + } + else + { + break; + } + } + + sortedBlocks.resize(sortedBlocks.size() - counter); + pass++; + } + + delete encoderRG; + + if (m_bVerboseOutput) + { + KLOGI("EtcComp", "Total iterations %d in %d passes\n", totalIterations, pass + 1); + } + + auto end = std::chrono::steady_clock::now(); + std::chrono::milliseconds elapsed = std::chrono::duration_cast(end - start); + m_iEncodeTime_ms = (int)elapsed.count(); + + return m_encodingStatus; + } + + Image::EncodingStatus Image::Decode(const uint8_t* etcBlocks, uint8_t* outputTexture) + { + // setup for rgb/a + Block4x4 block; + Block4x4Encoding* encoder = nullptr; + + // setup for r/rg11 + bool isSnorm = + m_format == Image::Format::SIGNED_R11 || + m_format == Image::Format::SIGNED_RG11; + bool isR = + m_format == Image::Format::R11 || + m_format == Image::Format::SIGNED_R11; + bool isRG = + m_format == Image::Format::RG11 || + m_format == Image::Format::SIGNED_RG11; + + IBlockEncoding* encoderRG = nullptr; + if (isR) + encoderRG = new Block4x4Encoding_R11; + else if (isRG) + encoderRG = new Block4x4Encoding_RG11; + + // initialized to 0 by ctor + ColorFloatRGBA dstPixels[16]; + + // r and rg wiil return yzw = 001 and zw = 01, rgb will return a = 1 + for (int i = 0; i < 16; ++i) + { + dstPixels[i].fA = 1.0f; + } + + int blockSize = Block4x4EncodingBits::GetBytesPerBlock(m_encodingbitsformat); + + for (int yy = 0; yy < (int)m_uiBlockRows; ++yy) + { + for (int xx = 0; xx < (int)m_uiBlockColumns; ++xx) + { + int srcX = xx; + int srcY = yy; + + const uint8_t* srcBlock = etcBlocks + (srcY * m_uiBlockColumns + srcX) * blockSize; + + if (!encoderRG) + { + // this almost works except alpha on RGBA8 isn't set + + block.Decode(srcX * 4, srcY * 4, (unsigned char*)srcBlock, this, 0); + + if (!encoder) + { + encoder = block.GetEncoding(); + } + if (m_format == Image::Format::RGBA8 || + m_format == Image::Format::SRGBA8) + { + encoder->DecodeAlpha(); + } + + // now extract rgb and a from the encoding + for (int i = 0; i < 16; ++i) + { + dstPixels[i] = encoder->GetDecodedPixel(i); + } + } + else + { + // this fills out r or rg with float values that are unorm 0 to 1 (even for snorm) + encoderRG->DecodeOnly(srcBlock, &dstPixels[0].fR, isSnorm); + } + + // now convert float pixels back to unorm8, don't copy pixels in block outside of w/h bound + // I don't know if dstPixels array is transposed when decoded or not? + + + ColorR8G8B8A8* dstPixels8 = (ColorR8G8B8A8*)outputTexture; + for (int y = 0; y < 4; y++) + { + int yd = y + srcY * 4; + if (yd >= (int)m_uiSourceHeight) + { + break; + } + + for (int x = 0; x < 4; x++) + { + int xd = x + srcX * 4; + if (xd >= (int)m_uiSourceWidth) + { + continue; + } + + const ColorFloatRGBA& color = dstPixels[x * 4 + y]; // Note: pixel lookup transpose here + + ColorR8G8B8A8& dst = dstPixels8[yd * m_uiSourceWidth + xd]; + dst.ucR = (uint8_t)color.IntRed(255.0f); + dst.ucG = (uint8_t)color.IntGreen(255.0f); + dst.ucB = (uint8_t)color.IntBlue(255.0f); + dst.ucA = (uint8_t)color.IntAlpha(255.0f); + } + } + } + } + + delete encoderRG; + + return m_encodingStatus; + } + + // ---------------------------------------------------------------------------------------------------- + // return a string name for a given image format + // + const char * Image::EncodingFormatToString(Image::Format a_format) + { + switch (a_format) + { + case Image::Format::ETC1: + return "ETC1"; + case Image::Format::RGB8: + return "RGB8"; + case Image::Format::SRGB8: + return "SRGB8"; + + case Image::Format::RGB8A1: + return "RGB8A1"; + case Image::Format::SRGB8A1: + return "SRGB8A1"; + case Image::Format::RGBA8: + return "RGBA8"; + case Image::Format::SRGBA8: + return "SRGBA8"; + + case Image::Format::R11: + return "R11"; + case Image::Format::SIGNED_R11: + return "SIGNED_R11"; + + case Image::Format::RG11: + return "RG11"; + case Image::Format::SIGNED_RG11: + return "SIGNED_RG11"; + case Image::Format::FORMATS: + case Image::Format::UNKNOWN: + default: + return "UNKNOWN"; + } + } + + // ---------------------------------------------------------------------------------------------------- + // return a string name for the image's format + // + const char * Image::EncodingFormatToString(void) const + { + return EncodingFormatToString(m_format); + } + + // ---------------------------------------------------------------------------------------------------- + // determine the encoding bits format based on the encoding format + // the encoding bits format is a family of bit encodings that are shared across various encoding formats + // + Block4x4EncodingBits::Format Image::DetermineEncodingBitsFormat(Format a_format) + { + Block4x4EncodingBits::Format encodingbitsformat; + + // determine encoding bits format from image format + switch (a_format) + { + case Format::ETC1: + case Format::RGB8: + case Format::SRGB8: + encodingbitsformat = Block4x4EncodingBits::Format::RGB8; + break; + + case Format::RGBA8: + case Format::SRGBA8: + encodingbitsformat = Block4x4EncodingBits::Format::RGBA8; + break; + + case Format::R11: + case Format::SIGNED_R11: + encodingbitsformat = Block4x4EncodingBits::Format::R11; + break; + + case Format::RG11: + case Format::SIGNED_RG11: + encodingbitsformat = Block4x4EncodingBits::Format::RG11; + break; + + case Format::RGB8A1: + case Format::SRGB8A1: + encodingbitsformat = Block4x4EncodingBits::Format::RGB8A1; + break; + + default: + encodingbitsformat = Block4x4EncodingBits::Format::UNKNOWN; + break; + } + + return encodingbitsformat; + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcImage.h b/libkram/etc2comp/EtcImage.h index 83047e40..7c2e0232 100644 --- a/libkram/etc2comp/EtcImage.h +++ b/libkram/etc2comp/EtcImage.h @@ -1,203 +1,203 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcColorFloatRGBA.h" -#include "EtcBlock4x4EncodingBits.h" -#include "EtcErrorMetric.h" - - -namespace Etc -{ - class Block4x4; - class EncoderSpec; - - class Image - { - public: - - enum EncodingStatus - { - SUCCESS = 0, - }; - - enum class Format - { - UNKNOWN, - // - ETC1, - // - // ETC2 formats - RGB8, - SRGB8, - RGBA8, - SRGBA8, - - R11, - SIGNED_R11, - RG11, - SIGNED_RG11, - - RGB8A1, - SRGB8A1, - // - FORMATS, - // - DEFAULT = SRGB8 - }; - - // constructor using source image - Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, - unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, - ErrorMetric a_errormetric); - - // constructor using encoding bits -// Image(Format a_format, -// unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, -// unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes, -// //Image *a_pimageSource, -// ErrorMetric a_errormetric); - - ~Image(void); - - // Multipass encoding. Uses tons of memory but can thread even though it doesn't help. - EncodingStatus Encode(float blockPercent, float a_fEffort, uint8_t* outputTexture); - - // Single-pass encoding. One block at a time to not was so much memory and time as Encode does. - EncodingStatus EncodeSinglepass(float a_fEffort, uint8_t* outputTexture); - - // Translate to rgba8unorm texture (even r/rg11) - EncodingStatus Decode(const uint8_t* etcBlocks, uint8_t* outputTexture); - - inline void AddToEncodingStatus(EncodingStatus a_encStatus) - { - m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus); - } - - inline unsigned int GetSourceWidth(void) const - { - return m_uiSourceWidth; - } - - inline unsigned int GetSourceHeight(void) const - { - return m_uiSourceHeight; - } - - inline unsigned int GetNumberOfBlocks() const - { - return m_uiBlockColumns * m_uiBlockRows; - } - - inline unsigned char * GetEncodingBits(void) - { - return m_paucEncodingBits; - } - - inline unsigned int GetEncodingBitsBytes(void) - { - return m_uiEncodingBitsBytes; - } - - inline int GetEncodingTimeMs(void) const - { - return m_iEncodeTime_ms; - } - - float GetError(void) const; - - inline bool HasSourcePixels() const - { - return m_pafrgbaSource != nullptr; - } - - inline ColorFloatRGBA GetSourcePixel(unsigned int x, unsigned int y) const - { - // clamp on border instead of returning nullptr and NaNs. Might weight color more. - if (x >= m_uiSourceWidth) - { - x = m_uiSourceWidth - 1; - } - if (y >= m_uiSourceHeight) - { - y = m_uiSourceHeight - 1; - } - - // Convert to float pixel here. This keeps input image much smaller. Only 8-bit data. - // But can't encode to R11 or R11G11 with full fp32 inputs. - return ColorFloatRGBA::ConvertFromRGBA8(m_pafrgbaSource[y * m_uiSourceWidth + x]); - } - - inline Format GetFormat(void) const - { - return m_format; - } - - static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format); - - inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension) - { - return (unsigned short)((a_ushOriginalDimension + 3) & ~3); - } - - inline ErrorMetric GetErrorMetric(void) const - { - return m_errormetric; - } - - static const char * EncodingFormatToString(Image::Format a_format); - - const char * EncodingFormatToString(void) const; - - void SetVerboseOutput(bool enabled) - { - m_bVerboseOutput = enabled; - } - bool GetVerboseOutput() const - { - return m_bVerboseOutput; - } - - private: - bool m_bVerboseOutput; - - - //Image(void); - - // inputs - const ColorR8G8B8A8 *m_pafrgbaSource; - unsigned int m_uiSourceWidth; - unsigned int m_uiSourceHeight; - unsigned int m_uiBlockColumns; - unsigned int m_uiBlockRows; - - // encoding - Format m_format; - Block4x4EncodingBits::Format m_encodingbitsformat; - unsigned int m_uiEncodingBitsBytes; // for entire image - unsigned char *m_paucEncodingBits; - ErrorMetric m_errormetric; - float m_fEffort; - - // stats - int m_iEncodeTime_ms; - - //this will hold any warning or errors that happen during encoding - EncodingStatus m_encodingStatus; - }; - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" +#include "EtcBlock4x4EncodingBits.h" +#include "EtcErrorMetric.h" + + +namespace Etc +{ + class Block4x4; + class EncoderSpec; + + class Image + { + public: + + enum EncodingStatus + { + SUCCESS = 0, + }; + + enum class Format + { + UNKNOWN, + // + ETC1, + // + // ETC2 formats + RGB8, + SRGB8, + RGBA8, + SRGBA8, + + R11, + SIGNED_R11, + RG11, + SIGNED_RG11, + + RGB8A1, + SRGB8A1, + // + FORMATS, + // + DEFAULT = SRGB8 + }; + + // constructor using source image + Image(Format a_format, const ColorR8G8B8A8 *a_pafSourceRGBA, + unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, + ErrorMetric a_errormetric); + + // constructor using encoding bits +// Image(Format a_format, +// unsigned int a_uiSourceWidth, unsigned int a_uiSourceHeight, +// unsigned char *a_paucEncidingBits, unsigned int a_uiEncodingBitsBytes, +// //Image *a_pimageSource, +// ErrorMetric a_errormetric); + + ~Image(void); + + // Multipass encoding. Uses tons of memory but can thread even though it doesn't help. + EncodingStatus Encode(float blockPercent, float a_fEffort, uint8_t* outputTexture); + + // Single-pass encoding. One block at a time to not was so much memory and time as Encode does. + EncodingStatus EncodeSinglepass(float a_fEffort, uint8_t* outputTexture); + + // Translate to rgba8unorm texture (even r/rg11) + EncodingStatus Decode(const uint8_t* etcBlocks, uint8_t* outputTexture); + + inline void AddToEncodingStatus(EncodingStatus a_encStatus) + { + m_encodingStatus = (EncodingStatus)((unsigned int)m_encodingStatus | (unsigned int)a_encStatus); + } + + inline unsigned int GetSourceWidth(void) const + { + return m_uiSourceWidth; + } + + inline unsigned int GetSourceHeight(void) const + { + return m_uiSourceHeight; + } + + inline unsigned int GetNumberOfBlocks() const + { + return m_uiBlockColumns * m_uiBlockRows; + } + + inline unsigned char * GetEncodingBits(void) + { + return m_paucEncodingBits; + } + + inline unsigned int GetEncodingBitsBytes(void) + { + return m_uiEncodingBitsBytes; + } + + inline int GetEncodingTimeMs(void) const + { + return m_iEncodeTime_ms; + } + + float GetError(void) const; + + inline bool HasSourcePixels() const + { + return m_pafrgbaSource != nullptr; + } + + inline ColorFloatRGBA GetSourcePixel(unsigned int x, unsigned int y) const + { + // clamp on border instead of returning nullptr and NaNs. Might weight color more. + if (x >= m_uiSourceWidth) + { + x = m_uiSourceWidth - 1; + } + if (y >= m_uiSourceHeight) + { + y = m_uiSourceHeight - 1; + } + + // Convert to float pixel here. This keeps input image much smaller. Only 8-bit data. + // But can't encode to R11 or R11G11 with full fp32 inputs. + return ColorFloatRGBA::ConvertFromRGBA8(m_pafrgbaSource[y * m_uiSourceWidth + x]); + } + + inline Format GetFormat(void) const + { + return m_format; + } + + static Block4x4EncodingBits::Format DetermineEncodingBitsFormat(Format a_format); + + inline static unsigned short CalcExtendedDimension(unsigned short a_ushOriginalDimension) + { + return (unsigned short)((a_ushOriginalDimension + 3) & ~3); + } + + inline ErrorMetric GetErrorMetric(void) const + { + return m_errormetric; + } + + static const char * EncodingFormatToString(Image::Format a_format); + + const char * EncodingFormatToString(void) const; + + void SetVerboseOutput(bool enabled) + { + m_bVerboseOutput = enabled; + } + bool GetVerboseOutput() const + { + return m_bVerboseOutput; + } + + private: + bool m_bVerboseOutput; + + + //Image(void); + + // inputs + const ColorR8G8B8A8 *m_pafrgbaSource; + unsigned int m_uiSourceWidth; + unsigned int m_uiSourceHeight; + unsigned int m_uiBlockColumns; + unsigned int m_uiBlockRows; + + // encoding + Format m_format; + Block4x4EncodingBits::Format m_encodingbitsformat; + unsigned int m_uiEncodingBitsBytes; // for entire image + unsigned char *m_paucEncodingBits; + ErrorMetric m_errormetric; + float m_fEffort; + + // stats + int m_iEncodeTime_ms; + + //this will hold any warning or errors that happen during encoding + EncodingStatus m_encodingStatus; + }; + +} // namespace Etc diff --git a/libkram/etc2comp/EtcIndividualTrys.cpp b/libkram/etc2comp/EtcIndividualTrys.cpp index 20b463a1..77db49a9 100644 --- a/libkram/etc2comp/EtcIndividualTrys.cpp +++ b/libkram/etc2comp/EtcIndividualTrys.cpp @@ -1,89 +1,89 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -EtcIndividualTrys.cpp - -Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode - -*/ - -#include "EtcConfig.h" -#include "EtcIndividualTrys.h" - -#include - -namespace Etc -{ - - // ---------------------------------------------------------------------------------------------------- - // construct a list of trys (encoding attempts) - // - // a_frgbaColor1 is the basecolor for the first half - // a_frgbaColor2 is the basecolor for the second half - // a_pauiPixelMapping1 is the pixel order for the first half - // a_pauiPixelMapping2 is the pixel order for the second half - // a_uiRadius is the amount to vary the base colors - // - IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, - const unsigned int *a_pauiPixelMapping1, - const unsigned int *a_pauiPixelMapping2, - unsigned int a_uiRadius) - { - assert(a_uiRadius <= MAX_RADIUS); - - ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4(); - ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4(); - - // quantize base colors - // ensure that trys with a_uiRadius don't overflow - int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius); - int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius); - int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius); - - int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius); - int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius); - int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius); - - m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); - m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); - - } - - // ---------------------------------------------------------------------------------------------------- - // - void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, - const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) - { - - m_iRed = a_iRed; - m_iGreen = a_iGreen; - m_iBlue = a_iBlue; - - m_pauiPixelMapping = a_pauiPixelMapping; - m_uiRadius = a_uiRadius; - - m_uiTrys = 0; - m_ptryBest = nullptr; - - // not initialized - // m_atry - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +EtcIndividualTrys.cpp + +Gathers the results of the various encoding trys for both halves of a 4x4 block for Individual mode + +*/ + +#include "EtcConfig.h" +#include "EtcIndividualTrys.h" + +#include + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // construct a list of trys (encoding attempts) + // + // a_frgbaColor1 is the basecolor for the first half + // a_frgbaColor2 is the basecolor for the second half + // a_pauiPixelMapping1 is the pixel order for the first half + // a_pauiPixelMapping2 is the pixel order for the second half + // a_uiRadius is the amount to vary the base colors + // + IndividualTrys::IndividualTrys(ColorFloatRGBA a_frgbaColor1, ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius) + { + assert(a_uiRadius <= MAX_RADIUS); + + ColorFloatRGBA frgbaQuantizedColor1 = a_frgbaColor1.QuantizeR4G4B4(); + ColorFloatRGBA frgbaQuantizedColor2 = a_frgbaColor2.QuantizeR4G4B4(); + + // quantize base colors + // ensure that trys with a_uiRadius don't overflow + int iRed1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntRed(15.0f), a_uiRadius); + int iGreen1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntGreen(15.0f), a_uiRadius); + int iBlue1 = MoveAwayFromEdge(frgbaQuantizedColor1.IntBlue(15.0f), a_uiRadius); + + int iRed2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntRed(15.0f), a_uiRadius); + int iGreen2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntGreen(15.0f), a_uiRadius); + int iBlue2 = MoveAwayFromEdge(frgbaQuantizedColor2.IntBlue(15.0f), a_uiRadius); + + m_half1.Init(iRed1, iGreen1, iBlue1, a_pauiPixelMapping1, a_uiRadius); + m_half2.Init(iRed2, iGreen2, iBlue2, a_pauiPixelMapping2, a_uiRadius); + + } + + // ---------------------------------------------------------------------------------------------------- + // + void IndividualTrys::Half::Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, unsigned int a_uiRadius) + { + + m_iRed = a_iRed; + m_iGreen = a_iGreen; + m_iBlue = a_iBlue; + + m_pauiPixelMapping = a_pauiPixelMapping; + m_uiRadius = a_uiRadius; + + m_uiTrys = 0; + m_ptryBest = nullptr; + + // not initialized + // m_atry + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcIndividualTrys.h b/libkram/etc2comp/EtcIndividualTrys.h index 49170d43..5fb12fbc 100644 --- a/libkram/etc2comp/EtcIndividualTrys.h +++ b/libkram/etc2comp/EtcIndividualTrys.h @@ -1,95 +1,95 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "EtcColorFloatRGBA.h" - -namespace Etc -{ - - class IndividualTrys - { - public: - - static const unsigned int MAX_RADIUS = 1; - - IndividualTrys(ColorFloatRGBA a_frgbaColor1, - ColorFloatRGBA a_frgbaColor2, - const unsigned int *a_pauiPixelMapping1, - const unsigned int *a_pauiPixelMapping2, - unsigned int a_uiRadius); - - inline static int MoveAwayFromEdge(int a_i, int a_iDistance) - { - if (a_i < (0+ a_iDistance)) - { - return (0 + a_iDistance); - } - else if (a_i > (15- a_iDistance)) - { - return (15 - a_iDistance); - } - - return a_i; - } - - class Try - { - public : - static const unsigned int SELECTORS = 8; // per half - - int m_iRed; - int m_iGreen; - int m_iBlue; - unsigned int m_uiCW; - unsigned int m_auiSelectors[SELECTORS]; - float m_fError; - }; - - class Half - { - public: - - static const unsigned int MAX_TRYS = 27; - - void Init(int a_iRed, int a_iGreen, int a_iBlue, - const unsigned int *a_pauiPixelMapping, - unsigned int a_uiRadius); - - // center of trys - int m_iRed; - int m_iGreen; - int m_iBlue; - - const unsigned int *m_pauiPixelMapping; - unsigned int m_uiRadius; - - unsigned int m_uiTrys; - Try m_atry[MAX_TRYS]; - - Try *m_ptryBest; - }; - - Half m_half1; - Half m_half2; - - }; - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "EtcColorFloatRGBA.h" + +namespace Etc +{ + + class IndividualTrys + { + public: + + static const unsigned int MAX_RADIUS = 1; + + IndividualTrys(ColorFloatRGBA a_frgbaColor1, + ColorFloatRGBA a_frgbaColor2, + const unsigned int *a_pauiPixelMapping1, + const unsigned int *a_pauiPixelMapping2, + unsigned int a_uiRadius); + + inline static int MoveAwayFromEdge(int a_i, int a_iDistance) + { + if (a_i < (0+ a_iDistance)) + { + return (0 + a_iDistance); + } + else if (a_i > (15- a_iDistance)) + { + return (15 - a_iDistance); + } + + return a_i; + } + + class Try + { + public : + static const unsigned int SELECTORS = 8; // per half + + int m_iRed; + int m_iGreen; + int m_iBlue; + unsigned int m_uiCW; + unsigned int m_auiSelectors[SELECTORS]; + float m_fError; + }; + + class Half + { + public: + + static const unsigned int MAX_TRYS = 27; + + void Init(int a_iRed, int a_iGreen, int a_iBlue, + const unsigned int *a_pauiPixelMapping, + unsigned int a_uiRadius); + + // center of trys + int m_iRed; + int m_iGreen; + int m_iBlue; + + const unsigned int *m_pauiPixelMapping; + unsigned int m_uiRadius; + + unsigned int m_uiTrys; + Try m_atry[MAX_TRYS]; + + Try *m_ptryBest; + }; + + Half m_half1; + Half m_half2; + + }; + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcMath.cpp b/libkram/etc2comp/EtcMath.cpp index cd70a9ab..096d5f7a 100644 --- a/libkram/etc2comp/EtcMath.cpp +++ b/libkram/etc2comp/EtcMath.cpp @@ -1,64 +1,64 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "EtcConfig.h" -#include "EtcMath.h" - -namespace Etc -{ - - // ---------------------------------------------------------------------------------------------------- - // calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[] - // use a_fSlope and a_fOffset to define that line - // - bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, - float *a_fSlope, float *a_fOffset) - { - float fPoints = (float)a_Points; - - float fSumX = 0.0f; - float fSumY = 0.0f; - float fSumXY = 0.0f; - float fSumX2 = 0.0f; - - for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++) - { - fSumX += a_afX[uiPoint]; - fSumY += a_afY[uiPoint]; - fSumXY += a_afX[uiPoint] * a_afY[uiPoint]; - fSumX2 += a_afX[uiPoint] * a_afX[uiPoint]; - } - - float fDivisor = fPoints*fSumX2 - fSumX*fSumX; - - // if vertical line - if (fDivisor == 0.0f) - { - *a_fSlope = 0.0f; - *a_fOffset = 0.0f; - return true; - } - - *a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor; - *a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints; - - return false; - } - - // ---------------------------------------------------------------------------------------------------- - // - -} // namespace Etc +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "EtcConfig.h" +#include "EtcMath.h" + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // calculate the line that best fits the set of XY points contained in a_afX[] and a_afY[] + // use a_fSlope and a_fOffset to define that line + // + bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, + float *a_fSlope, float *a_fOffset) + { + float fPoints = (float)a_Points; + + float fSumX = 0.0f; + float fSumY = 0.0f; + float fSumXY = 0.0f; + float fSumX2 = 0.0f; + + for (unsigned int uiPoint = 0; uiPoint < a_Points; uiPoint++) + { + fSumX += a_afX[uiPoint]; + fSumY += a_afY[uiPoint]; + fSumXY += a_afX[uiPoint] * a_afY[uiPoint]; + fSumX2 += a_afX[uiPoint] * a_afX[uiPoint]; + } + + float fDivisor = fPoints*fSumX2 - fSumX*fSumX; + + // if vertical line + if (fDivisor == 0.0f) + { + *a_fSlope = 0.0f; + *a_fOffset = 0.0f; + return true; + } + + *a_fSlope = (fPoints*fSumXY - fSumX*fSumY) / fDivisor; + *a_fOffset = (fSumY - (*a_fSlope)*fSumX) / fPoints; + + return false; + } + + // ---------------------------------------------------------------------------------------------------- + // + +} // namespace Etc diff --git a/libkram/etc2comp/EtcMath.h b/libkram/etc2comp/EtcMath.h index 3d951fee..c58c9a91 100644 --- a/libkram/etc2comp/EtcMath.h +++ b/libkram/etc2comp/EtcMath.h @@ -1,40 +1,40 @@ -/* - * Copyright 2015 The Etc2Comp Authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace Etc -{ - - // ---------------------------------------------------------------------------------------------------- - // return true if vertical line - bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, - float *a_fSlope, float *a_fOffset); - - inline float ConvertMSEToPSNR(float a_fMSE) - { - if (a_fMSE == 0.0f) - { - return INFINITY; - } - - return 10.0f * log10f(1.0f / a_fMSE); - } - - -} +/* + * Copyright 2015 The Etc2Comp Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace Etc +{ + + // ---------------------------------------------------------------------------------------------------- + // return true if vertical line + bool Regression(float a_afX[], float a_afY[], unsigned int a_Points, + float *a_fSlope, float *a_fOffset); + + inline float ConvertMSEToPSNR(float a_fMSE) + { + if (a_fMSE == 0.0f) + { + return INFINITY; + } + + return 10.0f * log10f(1.0f / a_fMSE); + } + + +} diff --git a/libkram/fastl/LICENSE b/libkram/fastl/LICENSE new file mode 100644 index 00000000..9bcd9455 --- /dev/null +++ b/libkram/fastl/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Ramon Viladomat + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/libkram/fastl/falgorithm.h b/libkram/fastl/falgorithm.h new file mode 100644 index 00000000..17f8d055 --- /dev/null +++ b/libkram/fastl/falgorithm.h @@ -0,0 +1,99 @@ +#pragma once + +#ifdef USE_FASTL + +// for size_t +#include "../fastl/vector.h" + +namespace fastl +{ + //------------------------------------------------------------------------------------------ + template + constexpr Iterator find_if(Iterator first, Iterator last, Predicate p) + { + for (; first != last; ++first) + { + if (p(*first)) return first; + } + return last; + } + + //------------------------------------------------------------------------------------------ + template + constexpr Iterator find(Iterator first, Iterator last, const T& value) + { + return find_if(first, last, [=](const T& input) { return input == value; }); + } + + //------------------------------------------------------------------------------------------ + template + Iterator remove_if(Iterator first, Iterator last, Predicate p) + { + first = fastl::find_if(first, last, p); + if (first != last) + { + for(Iterator i = first; ++i != last; ) + { + if (!p(*i)) *first++ = *i; + } + } + return first; + } + + //------------------------------------------------------------------------------------------ + template< class Iterator, class T > + Iterator remove(Iterator first, Iterator last, const T& value) + { + return remove_if(first, last, [=](const T& input) { return input == value; }); + } + + //------------------------------------------------------------------------------------------ + template + Iterator lower_bound(Iterator first, Iterator last, const T& value, Compare comp) + { + //specific implementation for continous memory ( vectors ) + Iterator it; + size_t count = last-first; + while (count > 0) { + it = first; + size_t step = count / 2; + it += step; + if (comp(*it,value)) + { + first = ++it; + count -= step + 1; + } + else + { + count = step; + } + } + return first; + } + + //------------------------------------------------------------------------------------------ + template + Iterator lower_bound(Iterator first, Iterator last, const T& value) + { + return lower_bound(first, last, value, [=](const T& lhs, const T& rhs) { return lhs < rhs; }); + } +} + +#else + +//#include +// +//namespace fastl +//{ +// template constexpr Iterator find(Iterator first, Iterator last, const T& value) { return std::find(first, last, value); } +// template constexpr Iterator find_if(Iterator first, Iterator last, Predicate p) { return std::find_if(first, last, p); } +// +// template inline Iterator remove(Iterator first, Iterator last, const T& value) { return std::remove(first, last, value); } +// template inline Iterator remove_if(Iterator first, Iterator last, Predicate p) { return std::remove_if(first, last, p); } +// +// template Iterator lower_bound(Iterator first, Iterator last, const T& value) { return std::lower_bound(first, last, value); } +// template Iterator lower_bound(Iterator first, Iterator last, const T& value, Compare comp) { return std::lower_bound(first, last, value, comp); } +//} + +#endif //USE_FASTL + diff --git a/libkram/fastl/fstring.h b/libkram/fastl/fstring.h new file mode 100644 index 00000000..87821e9f --- /dev/null +++ b/libkram/fastl/fstring.h @@ -0,0 +1,269 @@ +#pragma once + +#ifdef USE_FASTL + +#include "../fastl/vector.h" + +namespace fastl +{ + //------------------------------------------------------------------------------------------ + + // TODO: could make these macros instead to increase debug build speed + template + size_t ComputeStrLen(const TChar* str) // strlen + { + size_t ret; + for (ret = 0u; str[ret] != '\0';++ret){} + return ret; + } + + //------------------------------------------------------------------------------------------ + template + int ComputeStrCmp(const TChar* a, const TChar* b) // strcmp + { + for (size_t i = 0; ;++i) + { + // This also works for utf8 + if (a[i] != b[i]) + return a[i] < b[i] ? -1 : 1; + if (a[i] == '\0') + return 0; + } + } + + //////////////////////////////////////////////////////////////////////////////////////////// + // Build string as a vector + template + class StringImpl + { + private: + typedef vector TData; + public: + typedef TChar value_type; + typedef typename TData::size_type size_type; + + static constexpr size_type npos = -1; + public: + StringImpl(); + StringImpl(const TChar* input); + StringImpl(const TChar* input, size_type length); + + void clear(); + + bool empty() const { return size() == 0u; } + size_type size() const { return m_data.empty() ? 0 : m_data.size() - 1; } + size_type length() const { return size(); } + + TChar* begin() { return m_data.begin(); } + const TChar* begin() const { return m_data.begin(); } + + // this exludes the \0 + TChar* end() { return m_data.end() - 1; } + const TChar* end() const { return m_data.end() - 1; } + + TChar front() const { return *begin(); } + TChar back() const { return *end(); } + + const value_type* c_str() const { return m_data.begin(); } + + TChar& operator[](size_type index) { return m_data[index]; } + TChar operator[](size_type index) const { return m_data[index]; } + + StringImpl& erase(size_type index){ m_data.erase(m_data.begin()+index); return *this; } + StringImpl& erase(size_type index, size_type count){ m_data.erase(m_data.begin()+index,m_data.begin()+index+count); return *this; } + + void append(const TChar* str ); + + StringImpl operator+(const TChar c); + StringImpl operator+(const TChar* str); + StringImpl operator+(const StringImpl& str); + + StringImpl& operator += (TChar c) { m_data.insert(m_data.end()-1,c); return *this; } + StringImpl& operator += (const TChar* str) { Append(str,ComputeStrLen(str)); return *this; } + StringImpl& operator += (const StringImpl& str) { Append(str.c_str(), str.size()); return *this; } + + bool operator == (const TChar* str) const { return ComputeStrCmp(c_str(), str) == 0; } + bool operator != (const TChar* str) const { return ComputeStrCmp(c_str(), str) != 0; } + bool operator < (const TChar* str) const { return ComputeStrCmp(c_str(), str) < 0; } + bool operator > (const TChar* str) const { return ComputeStrCmp(c_str(), str) > 0; } + + bool operator == (const StringImpl& str) const { return *this == str.c_str(); } + bool operator != (const StringImpl& str) const { return *this != str.c_str(); } + bool operator < (const StringImpl& str) const { return *this < str.c_str(); } + bool operator > (const StringImpl& str) const { return *this > str.c_str(); } + + bool find_last_of(TChar c) + { + return strrchr(m_data.data(), c); + } + + StringImpl substr(size_type start, size_type count) + { + return StringImpl(&m_data[start], count); + } + + void pop_back() + { + if (!empty()) + { + // This doesn't work for multibyte chars + m_data.pop_back(); + m_data[m_data.size()-1] = (TChar)0; + } + } + + void insert(size_type index, const TChar* str) + { + size_type len = ComputeStrLen(str); + m_data.insert(m_data.begin()+index, str, str+len); + } + + void resize(size_type size, TChar value = 0) + { + size_type oldSize = m_data.size(); + size_type newSize = size+1; + if (newSize == oldSize) + return; + + m_data.resize(newSize); + + // Note: length and strlen with value of 0 unless those chars are filled + if (newSize > oldSize) + { + for (uint32_t i = oldSize-1; i < newSize; ++i) + { + m_data[i] = value; + } + } + m_data[newSize-1] = 0; + } + + private: + void Append(const TChar* str, const size_type appendSize); + + private: + TData m_data; + }; + + //Implementation + + //------------------------------------------------------------------------------------------ + template + StringImpl::StringImpl() + { + // TODO: this requires a heap allocate for all empty strings + m_data.reserve(1); + clear(); + } + + //------------------------------------------------------------------------------------------ + template + StringImpl::StringImpl(const TChar* input) + { + size_t length = ComputeStrLen(input); + m_data.reserve(length + 1); + clear(); + Append(input, length); + } + + //------------------------------------------------------------------------------------------ + template + StringImpl::StringImpl(const TChar* input, const size_type length) + { + m_data.reserve(length + 1); + clear(); + Append(input, length); + } + + //------------------------------------------------------------------------------------------ + template + inline void StringImpl::clear() + { + // need small string optimization + m_data.resize(1); + m_data[0] = '\0'; + } + + //------------------------------------------------------------------------------------------ + template + void StringImpl::append( const TChar* str ) + { + Append(str, ComputeStrLen(str)); + } + + //------------------------------------------------------------------------------------------ + template + StringImpl StringImpl::operator+(TChar c) + { + StringImpl ret; + ret.reserve(m_data.size() + 1); + + char cstr[2] = { c, 0 }; + ret.Append(c_str(), size()); + ret.Append(cstr, 1); + return ret; + } + //------------------------------------------------------------------------------------------ + template + StringImpl StringImpl::operator+(const TChar* str) + { + StringImpl ret; + size_t len = ComputeStrLen(str); + ret.reserve(m_data.size() + len); + + ret.Append(c_str(), size()); + ret.Append(str, len); + return ret; + } + + //------------------------------------------------------------------------------------------ + template + StringImpl StringImpl::operator+(const StringImpl& str) + { + StringImpl ret; + size_t len = str.size(); + ret.reserve(m_data.size() + len); + + ret.Append(c_str(), size()); + ret.Append(str, len); + return ret; + } + + //------------------------------------------------------------------------------------------ + template + void StringImpl::Append(const TChar* str, const size_type appendSize) + { + size_type writeIndex = size(); + m_data.resize(m_data.size()+appendSize); + for (size_type i = 0; i < appendSize; ++i, ++writeIndex) + { + m_data[writeIndex] = str[i]; + } + m_data.back() = '\0'; + } + + using string = StringImpl; + + // Code above is using char* in many places instead of TChar + // TODO: elim wstring if possible + // using wstring = StringImpl; +} + +#else + +//#include +// +//namespace fastl +//{ +// using string = std::string; +// using wstring = std::wstring; +//} + +#endif //USE_FASTL + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +using string = fastl::string; +using wstring = fastl::wstring; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/map.h b/libkram/fastl/map.h new file mode 100644 index 00000000..aeee6bfd --- /dev/null +++ b/libkram/fastl/map.h @@ -0,0 +1,134 @@ +#pragma once + +#ifdef USE_FASTL + +#include "../fastl/vector.h" +#include "../fastl/pair.h" +#include "../fastl/falgorithm.h" + +namespace fastl +{ + //////////////////////////////////////////////////////////////////////////////////////////// + // Build map as a vectorMap + template + class map + { + private: + typedef vector> TData; + + public: + typedef typename TData::iterator iterator; + typedef typename TData::const_iterator const_iterator; + typedef typename TData::value_type value_type; + typedef typename TData::size_type size_type; + typedef value_type& reference; + typedef const value_type& const_reference; + + public: + iterator begin() { return m_data.begin(); } + const_iterator begin() const { return m_data.begin(); } + iterator end() { return m_data.end(); } + const_iterator end() const { return m_data.end(); } + + bool empty() const { return m_data.empty(); } + size_type size() const { return m_data.size(); } + + TValue& operator[]( const TKey& key ); + + void clear() { m_data.clear(); } + + iterator insert(iterator hint, const value_type& value) { return m_data.insert(hint, value); } + iterator insert(const_iterator hint, const value_type& value) { return m_data.insert(hint, value); } + pair insert( value_type&& value ); + + void erase(iterator it) { m_data.erase(it); } + size_type erase(const TKey& key); + + iterator find( const TKey& key ); + const_iterator find( const TKey& key ) const; + + private: + TData m_data; + }; + + // Implementation + + //------------------------------------------------------------------------------------------ + template TValue& map::operator[]( const TKey& key ) + { + iterator entryIt = fastl::lower_bound(begin(), end(), key, [=](value_type& value, const TKey& key) {return value.first < key; }); + if (entryIt == end() || entryIt->first != key) + { + // TODO: this is expensive to insertion sort into a vector + // This causes all elements above to have to be copied and they don't have constant addresses. + // Also keys for unordered_map/set only provide == and hash, and not < + + entryIt = m_data.emplace(entryIt,key,TValue()); + } + + return entryIt->second; + } + + //------------------------------------------------------------------------------------------ + template pair::iterator,bool> map::insert( value_type&& inputValue ) + { + iterator entryIt = fastl::lower_bound(begin(), end(), inputValue, [=](value_type& a, const value_type& b) {return a.first < b.first; }); + if (entryIt == end() || entryIt->first != inputValue.first) + { + // TODO: this is expensive to insertion sort into a vector + // This causes all elements above to have to be copied and they don't have constant addresses. + // Also keys for unordered_map/set only provide == and hash, and not < + + entryIt = m_data.emplace(entryIt,move(inputValue)); + return pair(entryIt,true); + } + return pair(entryIt,false); + } + + //------------------------------------------------------------------------------------------ + template typename map::size_type map::erase(const TKey& key) + { + iterator found = find(key); + if (found != end()) + { + // TODO: this is expensive to erase an element from a vector + // This causes all elements above to have to be copied and they don't have constant addresses. + // Also keys for unordered_map/set only provide == and hash, and not < + + erase(found); + } + return size(); + } + + //------------------------------------------------------------------------------------------ + template typename map::iterator map::find( const TKey& key ) + { + iterator found = fastl::lower_bound(begin(), end(), key, [=](value_type& value, const TKey& key) {return value.first < key; }); + return found != end() && found->first == key ? found : end(); + } + + //------------------------------------------------------------------------------------------ + template typename map::const_iterator map::find(const TKey& key) const + { + const_iterator found = fastl::lower_bound(begin(), end(), key, [=](const value_type& value, const TKey& key) {return value.first < key; }); + return found != end() && found->first == key ? found : end(); + } + +} + +#else + +//#include +// +//namespace fastl +//{ +// template using map = std::map; +//} + +#endif //USE_FASTL + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +template using map = fastl::map; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/pair.h b/libkram/fastl/pair.h new file mode 100644 index 00000000..3eb68ec0 --- /dev/null +++ b/libkram/fastl/pair.h @@ -0,0 +1,60 @@ +#pragma once + +#ifdef USE_FASTL + +namespace fastl +{ + template + struct pair + { + typedef T1 first_type; + typedef T2 second_type; + + pair() + :first(),second(){} + pair(const T1& _first, const T2& _second) + :first(_first),second(_second) {} + + // added these + pair(const pair& rhs) + :first(rhs.first),second(rhs.second) {} + pair(pair&& rhs) + :first(std::move(rhs.first)),second(std::move(rhs.second)) {} + + pair& operator=(const pair& rhs) + { + first = rhs.first; + second = rhs.second; + return *this; + } + pair& operator=(pair&& rhs) + { + first = std::move(rhs.first); + second = std::move(rhs.second); + return *this; + } + + T1 first; + T2 second; + }; + + template + pair make_pair(const T1& k, const T2& v) { return pair(k,v); } +} + +#else + +//#include +// +//namespace fastl +//{ +// template using pair = std::pair; +//} + +#endif //USE_FASTL + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +template using pair = fastl::pair; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/set.h b/libkram/fastl/set.h new file mode 100644 index 00000000..d964050f --- /dev/null +++ b/libkram/fastl/set.h @@ -0,0 +1,114 @@ +#pragma once + +//#ifdef USE_FASTL + +#include "../fastl/falgorithm.h" +#include "../fastl/pair.h" +#include "../fastl/vector.h" + +namespace fastl +{ + //////////////////////////////////////////////////////////////////////////////////////////// + // Build map as a vectorMap + template + class set + { + private: + typedef vector TData; + + public: + typedef typename TData::iterator iterator; + typedef typename TData::const_iterator const_iterator; + typedef typename TData::value_type value_type; + typedef typename TData::size_type size_type; + typedef value_type& reference; + typedef const value_type& const_reference; + + public: + iterator begin() { return m_data.begin(); } + const_iterator begin() const { return m_data.begin(); } + iterator end() { return m_data.end(); } + const_iterator end() const { return m_data.end(); } + + bool empty() const { return m_data.empty(); } + size_type size() const { return m_data.size(); } + + void clear() { m_data.clear(); } + + template< class... Args > pair emplace( Args&&... args ); + + void erase( iterator it ) { m_data.erase( it ); } + size_type erase( const TKey& key ); + + iterator find( const TKey& key ); + const_iterator find( const TKey& key ) const; + + private: + TData m_data; + }; + + // Implementation + + //------------------------------------------------------------------------------------------ + template + template pair::iterator, bool> set::emplace( Args&&... args ) + { + TKey inputValue{ args... }; + iterator entryIt = fastl::lower_bound( begin(), end(), inputValue, [=]( value_type& a, const value_type& b ) {return a < b; } ); + if( entryIt == end() || *entryIt != inputValue ) + { + // TODO: this isn't fast to emplace into a vector, all elements above shift + // and addresses are no longer constant on elements + + entryIt = m_data.emplace( entryIt, args... ); + return pair( entryIt, true ); + } + return pair( entryIt, false ); + } + + //------------------------------------------------------------------------------------------ + template typename set::size_type set::erase( const TKey& key ) + { + iterator found = find( key ); + if( found != end() ) + { + // TODO: this isn't fast to erase from a vector, all elements above shift + // and addresses are no longer constant on elements + + erase( found ); + } + return size(); + } + + //------------------------------------------------------------------------------------------ + template typename set::iterator set::find( const TKey& key ) + { + iterator found = fastl::lower_bound( begin(), end(), key, [=]( const TKey& value, const TKey& key ) {return value < key; } ); + return found != end() && *found == key ? found : end(); + } + + //------------------------------------------------------------------------------------------ + template typename set::const_iterator set::find( const TKey& key ) const + { + const_iterator found = fastl::lower_bound( begin(), end(), key, [=]( const TKey& value, const TKey& key ) {return value < key; } ); + return found != end() && *found == key ? found : end(); + } + +} + +//#else +// +//#include +// +//namespace fastl +//{ +// template using set = std::set; +//} +// +//#endif //USE_FASTL +// +//#ifdef FASTL_EXPOSE_PLAIN_ALIAS +// +//template using set = fastl::set; +// +//#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/unordered_map.h b/libkram/fastl/unordered_map.h new file mode 100644 index 00000000..1b312a1b --- /dev/null +++ b/libkram/fastl/unordered_map.h @@ -0,0 +1,28 @@ +#pragma once + +#ifdef USE_FASTL + +#include "../fastl/map.h" + +namespace fastl +{ + // Build unordered_map as a map + template using unordered_map = fastl::map; +} + +#else + +//#include +// +//namespace fastl +//{ +// template using unordered_map = std::unordered_map; +//} + +#endif //USE_FASTL + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +template using unordered_map = fastl::unordered_map; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/unordered_set.h b/libkram/fastl/unordered_set.h new file mode 100644 index 00000000..f2aed10d --- /dev/null +++ b/libkram/fastl/unordered_set.h @@ -0,0 +1,28 @@ +#pragma once + +#ifdef USE_FASTL + +#include "../fastl/set.h" + +namespace fastl +{ + // Build unordered_map as a map + template using unordered_set = fastl::set; +} + +#else + +//#include +// +//namespace fastl +//{ +// template using unordered_set = std::unordered_set; +//} + +#endif //USE_FASTL + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +template using unordered_set = fastl::unordered_set; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fastl/vector.h b/libkram/fastl/vector.h new file mode 100644 index 00000000..b07b041f --- /dev/null +++ b/libkram/fastl/vector.h @@ -0,0 +1,404 @@ +#pragma once + +#ifdef USE_FASTL + +#include // for size_t +#include // for placement new +#include // for move + +//Forward declare the placement new in order to avoid #include +//extern void* operator new (size_t size, void* ptr) noexcept; + +namespace fastl +{ + //------------------------------------------------------------------------------------------ + //Consider moving this around if needed somewhere else + template + struct remove_reference { typedef T type; }; + template + struct remove_reference { typedef T type; }; + template + struct remove_reference { typedef T type; }; + + // This is ambigous if included + //template typename remove_reference::type&& move(T&& arg) { return static_cast::type&&>(arg); } + + template + struct enable_if {}; + template + struct enable_if { typedef T type; }; + template + using enable_if_t = typename enable_if::type; + + template + void Construct(T* ptr, Args&&... args) { new (ptr) T(std::move(args)...); } + + template + T* CreateBuffer(size_t size){ return (T*) new char[size*sizeof(T)]; } + template + void DestroyBuffer(T* buffer){ delete[] reinterpret_cast(buffer); } + + //////////////////////////////////////////////////////////////////////////////////////////// + template + class vector + { + private: + enum { DEFAULT_CAPACITY_SIZE = 8 }; + public: + typedef T value_type; + typedef size_t size_type; + + typedef T* iterator; + typedef const T* const_iterator; + typedef T& reference; + typedef const T& const_reference; + + public: + vector(); + explicit vector(size_t size); + + //If more than 1 argument is provided we assume that we want to construct the vector with its elements ( using SFINAE - fake initializer list ) + template 1)>* = nullptr> + vector(Args&&... args) : m_data(CreateBuffer(sizeof...(Args))), m_size(0u), m_capacity(sizeof...(Args)) + { + (emplace_back(args),...); + } + + vector(const vector& input); + vector(vector&& input); + ~vector(); + + vector& operator = (const vector& t); + vector& operator = (vector&& t); + + reference operator[](size_type index) { return m_data[index]; } + const_reference operator[](size_type index) const { return m_data[index]; } + + size_type size() const{ return m_size; } + size_type capacity() const { return m_capacity; } + + iterator begin() { return m_data; } + const_iterator begin() const { return m_data; } + iterator end() { return m_data+m_size; } + const_iterator end() const { return m_data+m_size; } + + // TOOD: need front + reference back() { return m_data[m_size-1]; } + + bool empty() const { return m_size == 0u; } + + void reserve(const size_type size); + void resize(const size_type size); + void clear(); + + void push_back(const value_type& value); + + iterator insert(iterator it, const value_type& value); + void insert(iterator it, const value_type* beg, const value_type* en) + { + size_type len = en - beg; + reserve(size() + len); + + // TODO: fix this isn't fast, since it has to shift all elements above + // the iterator. Do that once. + while (beg != en) + { + insert(it, *beg); + + ++it; + ++beg; + } + } + + template + iterator emplace(iterator it, Args&&... args); + + template + void emplace_back(Args&&... args); + + void pop_back(); + + iterator erase(iterator it); + iterator erase(iterator fromIt,iterator toIt); + + const value_type* data() const { return m_data; } + value_type* data() { return m_data; } + + // TODO: no-op for now, but should copy and release memory + void shrink_to_fit() { } + + void swap(vector& rhs) + { + if (this == &rhs) return; + std::swap(m_data, rhs.m_data); + std::swap(m_size, rhs.m_size); + std::swap(m_capacity, rhs.m_capacity); + } + + private: + void Destroy(); + + private: + value_type* m_data; + + // TODO: could map size_type to int32_t + size_type m_size; + size_type m_capacity; + }; + + //Implementation + + //------------------------------------------------------------------------------------------ + template + vector::vector() + : m_data(nullptr) + , m_size(0u) + , m_capacity(0u) + { + } + + //------------------------------------------------------------------------------------------ + template + vector::vector(size_t size) + : m_data(CreateBuffer(size)) + , m_size(size) + , m_capacity(size) + { + //Call the default constructor for all preallocated elements + for (size_type i = 0u; i < m_size; ++i) + { + Construct(&m_data[i]); + } + } + + //------------------------------------------------------------------------------------------ + template vector::vector(const vector& input) + : m_data(CreateBuffer(input.m_capacity)) + , m_size(input.m_size) + , m_capacity(input.m_capacity) + { + for (size_t i = 0u; i < m_size; ++i) + { + Construct(&m_data[i]); + m_data[i] = input[i]; + } + } + + //------------------------------------------------------------------------------------------ + template + vector::vector(vector&& input) + : m_data(std::move(input.m_data)) + , m_size(input.m_size) + , m_capacity(input.m_capacity) + { + input.m_data = nullptr; + input.m_size = 0u; + input.m_capacity = 0u; + } + + //------------------------------------------------------------------------------------------ + template + vector::~vector() + { + Destroy(); + } + + //------------------------------------------------------------------------------------------ + template + inline vector& vector::operator= (const vector& input) + { + clear(); + reserve(input.m_capacity); + m_size = input.m_size; + for (size_type i = 0u; i < m_size; ++i) + { + Construct(&m_data[i], input[i]); + } + return *this; + } + + //------------------------------------------------------------------------------------------ + template + vector& vector::operator = (vector&& t) + { + if (this != &t) + { + Destroy(); + m_data = std::move(t.m_data); + m_size = t.m_size; + m_capacity = t.m_capacity; + t.m_data = nullptr; + t.m_size = 0u; + t.m_capacity = 0u; + } + return *this; + } + + //------------------------------------------------------------------------------------------ + template + inline void vector::reserve(const size_type size) + { + if (size > m_capacity) + { + m_capacity = size; + T* newData = CreateBuffer(m_capacity); + + for (size_type i = 0u; i < m_size; ++i) + { + Construct(&newData[i], std::move(m_data[i])); + m_data[i].~T(); + } + + DestroyBuffer(m_data); + m_data = newData; + + } + } + + //------------------------------------------------------------------------------------------ + template + inline void vector::resize(const size_type size) + { + reserve(size); + + for (size_type i=size;i(&m_data[i]); + } + + m_size = size; + } + + //------------------------------------------------------------------------------------------ + template + inline void vector::clear() + { + resize(0u); + } + + //------------------------------------------------------------------------------------------ + template + inline void vector::push_back(const value_type& value) + { + emplace(end(), value); + } + + //------------------------------------------------------------------------------------------ + template + inline typename vector::iterator vector::insert(iterator it,const value_type& value) + { + return emplace(it, value); + } + + //------------------------------------------------------------------------------------------ + template template + void vector::emplace_back(Args&&... args) + { + emplace(end(),std::move(args)...); + } + + //------------------------------------------------------------------------------------------ + template + inline void vector::pop_back() + { + if (!empty()) + { + resize(m_size-1); + } + } + + //------------------------------------------------------------------------------------------ + template template + typename vector::iterator vector::emplace(iterator it, Args&&... args) + { + const size_type index = it-begin(); + + if (m_size == m_capacity) + { + reserve(m_capacity == 0u? DEFAULT_CAPACITY_SIZE : 2u*m_capacity); + } + + iterator insertIt = begin() + index; //this is important as reserve might move the memory around + iterator endIt = end(); + + if(endIt == insertIt) + { + Construct(insertIt, std::move(args)...); + } + else + { + //Build the new element + Construct(end(), std::move(*(end()-1))); + + //Shift remaining elements + for (iterator i = end()-1; i > insertIt;--i) + { + *i = std::move(*(i-1)); + } + + *insertIt = T(std::move(args)...); + } + + ++m_size; + + return insertIt; + } + + //------------------------------------------------------------------------------------------ + template + inline typename vector::iterator vector::erase(iterator it) + { + return erase(it,it+1); + } + + //------------------------------------------------------------------------------------------ + template + inline typename vector::iterator vector::erase(iterator fromIt, iterator toIt) + { + const size_type rangeSize = toIt-fromIt; + const_iterator batchEndIt = end()-rangeSize; + + for (iterator i = fromIt; i < batchEndIt; ++i) + { + *i = std::move(*(i + rangeSize)); + } + + resize(m_size - rangeSize); + return fromIt; + } + + //------------------------------------------------------------------------------------------ + template + void vector::Destroy() + { + for (size_type i=0u;i +// +//namespace fastl +//{ +// template using vector = std::vector; +//} + +#endif //USE_FASTL + + +#ifdef FASTL_EXPOSE_PLAIN_ALIAS + +template using vector = fastl::vector; + +#endif //FASTL_EXPOSE_PLAIN_ALIAS diff --git a/libkram/fmt/LICENSE.rst b/libkram/fmt/LICENSE.rst new file mode 100644 index 00000000..f0ec3db4 --- /dev/null +++ b/libkram/fmt/LICENSE.rst @@ -0,0 +1,27 @@ +Copyright (c) 2012 - present, Victor Zverovich + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +--- Optional exception to the license --- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into a machine-executable object form of such +source code, you may redistribute such embedded portions in such object form +without including the above copyright and permission notices. diff --git a/libkram/fmt/args.h b/libkram/fmt/args.h new file mode 100644 index 00000000..a3966d14 --- /dev/null +++ b/libkram/fmt/args.h @@ -0,0 +1,234 @@ +// Formatting library for C++ - dynamic format arguments +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_ARGS_H_ +#define FMT_ARGS_H_ + +#include // std::reference_wrapper +#include // std::unique_ptr +#include + +#include "core.h" + +FMT_BEGIN_NAMESPACE + +namespace detail { + +template struct is_reference_wrapper : std::false_type {}; +template +struct is_reference_wrapper> : std::true_type {}; + +template const T& unwrap(const T& v) { return v; } +template const T& unwrap(const std::reference_wrapper& v) { + return static_cast(v); +} + +class dynamic_arg_list { + // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for + // templates it doesn't complain about inability to deduce single translation + // unit for placing vtable. So storage_node_base is made a fake template. + template struct node { + virtual ~node() = default; + std::unique_ptr> next; + }; + + template struct typed_node : node<> { + T value; + + template + FMT_CONSTEXPR typed_node(const Arg& arg) : value(arg) {} + + template + FMT_CONSTEXPR typed_node(const basic_string_view& arg) + : value(arg.data(), arg.size()) {} + }; + + std::unique_ptr> head_; + + public: + template const T& push(const Arg& arg) { + auto new_node = std::unique_ptr>(new typed_node(arg)); + auto& value = new_node->value; + new_node->next = std::move(head_); + head_ = std::move(new_node); + return value; + } +}; +} // namespace detail + +/** + \rst + A dynamic version of `fmt::format_arg_store`. + It's equipped with a storage to potentially temporary objects which lifetimes + could be shorter than the format arguments object. + + It can be implicitly converted into `~fmt::basic_format_args` for passing + into type-erased formatting functions such as `~fmt::vformat`. + \endrst + */ +template +class dynamic_format_arg_store +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 + // Workaround a GCC template argument substitution bug. + : public basic_format_args +#endif +{ + private: + using char_type = typename Context::char_type; + + template struct need_copy { + static constexpr detail::type mapped_type = + detail::mapped_type_constant::value; + + enum { + value = !(detail::is_reference_wrapper::value || + std::is_same>::value || + std::is_same>::value || + (mapped_type != detail::type::cstring_type && + mapped_type != detail::type::string_type && + mapped_type != detail::type::custom_type)) + }; + }; + + template + using stored_type = conditional_t< + std::is_convertible>::value && + !detail::is_reference_wrapper::value, + std::basic_string, T>; + + // Storage of basic_format_arg must be contiguous. + std::vector> data_; + std::vector> named_info_; + + // Storage of arguments not fitting into basic_format_arg must grow + // without relocation because items in data_ refer to it. + detail::dynamic_arg_list dynamic_args_; + + friend class basic_format_args; + + unsigned long long get_types() const { + return detail::is_unpacked_bit | data_.size() | + (named_info_.empty() + ? 0ULL + : static_cast(detail::has_named_args_bit)); + } + + const basic_format_arg* data() const { + return named_info_.empty() ? data_.data() : data_.data() + 1; + } + + template void emplace_arg(const T& arg) { + data_.emplace_back(detail::make_arg(arg)); + } + + template + void emplace_arg(const detail::named_arg& arg) { + if (named_info_.empty()) { + constexpr const detail::named_arg_info* zero_ptr{nullptr}; + data_.insert(data_.begin(), {zero_ptr, 0}); + } + data_.emplace_back(detail::make_arg(detail::unwrap(arg.value))); + auto pop_one = [](std::vector>* data) { + data->pop_back(); + }; + std::unique_ptr>, decltype(pop_one)> + guard{&data_, pop_one}; + named_info_.push_back({arg.name, static_cast(data_.size() - 2u)}); + data_[0].value_.named_args = {named_info_.data(), named_info_.size()}; + guard.release(); + } + + public: + constexpr dynamic_format_arg_store() = default; + + /** + \rst + Adds an argument into the dynamic store for later passing to a formatting + function. + + Note that custom types and string types (but not string views) are copied + into the store dynamically allocating memory if necessary. + + **Example**:: + + fmt::dynamic_format_arg_store store; + store.push_back(42); + store.push_back("abc"); + store.push_back(1.5f); + std::string result = fmt::vformat("{} and {} and {}", store); + \endrst + */ + template void push_back(const T& arg) { + if (detail::const_check(need_copy::value)) + emplace_arg(dynamic_args_.push>(arg)); + else + emplace_arg(detail::unwrap(arg)); + } + + /** + \rst + Adds a reference to the argument into the dynamic store for later passing to + a formatting function. + + **Example**:: + + fmt::dynamic_format_arg_store store; + char band[] = "Rolling Stones"; + store.push_back(std::cref(band)); + band[9] = 'c'; // Changing str affects the output. + std::string result = fmt::vformat("{}", store); + // result == "Rolling Scones" + \endrst + */ + template void push_back(std::reference_wrapper arg) { + static_assert( + need_copy::value, + "objects of built-in types and string views are always copied"); + emplace_arg(arg.get()); + } + + /** + Adds named argument into the dynamic store for later passing to a formatting + function. ``std::reference_wrapper`` is supported to avoid copying of the + argument. The name is always copied into the store. + */ + template + void push_back(const detail::named_arg& arg) { + const char_type* arg_name = + dynamic_args_.push>(arg.name).c_str(); + if (detail::const_check(need_copy::value)) { + emplace_arg( + fmt::arg(arg_name, dynamic_args_.push>(arg.value))); + } else { + emplace_arg(fmt::arg(arg_name, arg.value)); + } + } + + /** Erase all elements from the store */ + void clear() { + data_.clear(); + named_info_.clear(); + dynamic_args_ = detail::dynamic_arg_list(); + } + + /** + \rst + Reserves space to store at least *new_cap* arguments including + *new_cap_named* named arguments. + \endrst + */ + void reserve(size_t new_cap, size_t new_cap_named) { + FMT_ASSERT(new_cap >= new_cap_named, + "Set of arguments includes set of named arguments"); + data_.reserve(new_cap); + named_info_.reserve(new_cap_named); + } +}; + +FMT_END_NAMESPACE + +#endif // FMT_ARGS_H_ diff --git a/libkram/fmt/chrono.h b/libkram/fmt/chrono.h new file mode 100644 index 00000000..ed7f5f16 --- /dev/null +++ b/libkram/fmt/chrono.h @@ -0,0 +1,2069 @@ +// Formatting library for C++ - chrono support +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_CHRONO_H_ +#define FMT_CHRONO_H_ + +#include +#include +#include // std::isfinite +#include // std::memcpy +#include +#include +#include +#include +#include + +#include "format.h" + +FMT_BEGIN_NAMESPACE + +// Enable tzset. +#ifndef FMT_USE_TZSET +// UWP doesn't provide _tzset. +# if FMT_HAS_INCLUDE("winapifamily.h") +# include +# endif +# if defined(_WIN32) && (!defined(WINAPI_FAMILY) || \ + (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) +# define FMT_USE_TZSET 1 +# else +# define FMT_USE_TZSET 0 +# endif +#endif + +// Enable safe chrono durations, unless explicitly disabled. +#ifndef FMT_SAFE_DURATION_CAST +# define FMT_SAFE_DURATION_CAST 1 +#endif +#if FMT_SAFE_DURATION_CAST + +// For conversion between std::chrono::durations without undefined +// behaviour or erroneous results. +// This is a stripped down version of duration_cast, for inclusion in fmt. +// See https://github.com/pauldreik/safe_duration_cast +// +// Copyright Paul Dreik 2019 +namespace safe_duration_cast { + +template ::value && + std::numeric_limits::is_signed == + std::numeric_limits::is_signed)> +FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { + ec = 0; + using F = std::numeric_limits; + using T = std::numeric_limits; + static_assert(F::is_integer, "From must be integral"); + static_assert(T::is_integer, "To must be integral"); + + // A and B are both signed, or both unsigned. + if (detail::const_check(F::digits <= T::digits)) { + // From fits in To without any problem. + } else { + // From does not always fit in To, resort to a dynamic check. + if (from < (T::min)() || from > (T::max)()) { + // outside range. + ec = 1; + return {}; + } + } + return static_cast(from); +} + +/** + * converts From to To, without loss. If the dynamic value of from + * can't be converted to To without loss, ec is set. + */ +template ::value && + std::numeric_limits::is_signed != + std::numeric_limits::is_signed)> +FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { + ec = 0; + using F = std::numeric_limits; + using T = std::numeric_limits; + static_assert(F::is_integer, "From must be integral"); + static_assert(T::is_integer, "To must be integral"); + + if (detail::const_check(F::is_signed && !T::is_signed)) { + // From may be negative, not allowed! + if (fmt::detail::is_negative(from)) { + ec = 1; + return {}; + } + // From is positive. Can it always fit in To? + if (detail::const_check(F::digits > T::digits) && + from > static_cast(detail::max_value())) { + ec = 1; + return {}; + } + } + + if (detail::const_check(!F::is_signed && T::is_signed && + F::digits >= T::digits) && + from > static_cast(detail::max_value())) { + ec = 1; + return {}; + } + return static_cast(from); // Lossless conversion. +} + +template ::value)> +FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { + ec = 0; + return from; +} // function + +// clang-format off +/** + * converts From to To if possible, otherwise ec is set. + * + * input | output + * ---------------------------------|--------------- + * NaN | NaN + * Inf | Inf + * normal, fits in output | converted (possibly lossy) + * normal, does not fit in output | ec is set + * subnormal | best effort + * -Inf | -Inf + */ +// clang-format on +template ::value)> +FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { + ec = 0; + using T = std::numeric_limits; + static_assert(std::is_floating_point::value, "From must be floating"); + static_assert(std::is_floating_point::value, "To must be floating"); + + // catch the only happy case + if (std::isfinite(from)) { + if (from >= T::lowest() && from <= (T::max)()) { + return static_cast(from); + } + // not within range. + ec = 1; + return {}; + } + + // nan and inf will be preserved + return static_cast(from); +} // function + +template ::value)> +FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { + ec = 0; + static_assert(std::is_floating_point::value, "From must be floating"); + return from; +} + +/** + * safe duration cast between integral durations + */ +template ::value), + FMT_ENABLE_IF(std::is_integral::value)> +To safe_duration_cast(std::chrono::duration from, + int& ec) { + using From = std::chrono::duration; + ec = 0; + // the basic idea is that we need to convert from count() in the from type + // to count() in the To type, by multiplying it with this: + struct Factor + : std::ratio_divide {}; + + static_assert(Factor::num > 0, "num must be positive"); + static_assert(Factor::den > 0, "den must be positive"); + + // the conversion is like this: multiply from.count() with Factor::num + // /Factor::den and convert it to To::rep, all this without + // overflow/underflow. let's start by finding a suitable type that can hold + // both To, From and Factor::num + using IntermediateRep = + typename std::common_type::type; + + // safe conversion to IntermediateRep + IntermediateRep count = + lossless_integral_conversion(from.count(), ec); + if (ec) return {}; + // multiply with Factor::num without overflow or underflow + if (detail::const_check(Factor::num != 1)) { + const auto max1 = detail::max_value() / Factor::num; + if (count > max1) { + ec = 1; + return {}; + } + const auto min1 = + (std::numeric_limits::min)() / Factor::num; + if (!std::is_unsigned::value && count < min1) { + ec = 1; + return {}; + } + count *= Factor::num; + } + + if (detail::const_check(Factor::den != 1)) count /= Factor::den; + auto tocount = lossless_integral_conversion(count, ec); + return ec ? To() : To(tocount); +} + +/** + * safe duration_cast between floating point durations + */ +template ::value), + FMT_ENABLE_IF(std::is_floating_point::value)> +To safe_duration_cast(std::chrono::duration from, + int& ec) { + using From = std::chrono::duration; + ec = 0; + if (std::isnan(from.count())) { + // nan in, gives nan out. easy. + return To{std::numeric_limits::quiet_NaN()}; + } + // maybe we should also check if from is denormal, and decide what to do about + // it. + + // +-inf should be preserved. + if (std::isinf(from.count())) { + return To{from.count()}; + } + + // the basic idea is that we need to convert from count() in the from type + // to count() in the To type, by multiplying it with this: + struct Factor + : std::ratio_divide {}; + + static_assert(Factor::num > 0, "num must be positive"); + static_assert(Factor::den > 0, "den must be positive"); + + // the conversion is like this: multiply from.count() with Factor::num + // /Factor::den and convert it to To::rep, all this without + // overflow/underflow. let's start by finding a suitable type that can hold + // both To, From and Factor::num + using IntermediateRep = + typename std::common_type::type; + + // force conversion of From::rep -> IntermediateRep to be safe, + // even if it will never happen be narrowing in this context. + IntermediateRep count = + safe_float_conversion(from.count(), ec); + if (ec) { + return {}; + } + + // multiply with Factor::num without overflow or underflow + if (detail::const_check(Factor::num != 1)) { + constexpr auto max1 = detail::max_value() / + static_cast(Factor::num); + if (count > max1) { + ec = 1; + return {}; + } + constexpr auto min1 = std::numeric_limits::lowest() / + static_cast(Factor::num); + if (count < min1) { + ec = 1; + return {}; + } + count *= static_cast(Factor::num); + } + + // this can't go wrong, right? den>0 is checked earlier. + if (detail::const_check(Factor::den != 1)) { + using common_t = typename std::common_type::type; + count /= static_cast(Factor::den); + } + + // convert to the to type, safely + using ToRep = typename To::rep; + + const ToRep tocount = safe_float_conversion(count, ec); + if (ec) { + return {}; + } + return To{tocount}; +} +} // namespace safe_duration_cast +#endif + +// Prevents expansion of a preceding token as a function-style macro. +// Usage: f FMT_NOMACRO() +#define FMT_NOMACRO + +namespace detail { +template struct null {}; +inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } +inline null<> localtime_s(...) { return null<>(); } +inline null<> gmtime_r(...) { return null<>(); } +inline null<> gmtime_s(...) { return null<>(); } + +inline const std::locale& get_classic_locale() { + static const auto& locale = std::locale::classic(); + return locale; +} + +template struct codecvt_result { + static constexpr const size_t max_size = 32; + CodeUnit buf[max_size]; + CodeUnit* end; +}; +template +constexpr const size_t codecvt_result::max_size; + +template +void write_codecvt(codecvt_result& out, string_view in_buf, + const std::locale& loc) { +#if FMT_CLANG_VERSION +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wdeprecated" + auto& f = std::use_facet>(loc); +# pragma clang diagnostic pop +#else + auto& f = std::use_facet>(loc); +#endif + auto mb = std::mbstate_t(); + const char* from_next = nullptr; + auto result = f.in(mb, in_buf.begin(), in_buf.end(), from_next, + std::begin(out.buf), std::end(out.buf), out.end); + if (result != std::codecvt_base::ok) + FMT_THROW(format_error("failed to format time")); +} + +template +auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc) + -> OutputIt { + if (detail::is_utf8() && loc != get_classic_locale()) { + // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and + // gcc-4. +#if FMT_MSC_VERSION != 0 || \ + (defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI)) + // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5 + // and newer. + using code_unit = wchar_t; +#else + using code_unit = char32_t; +#endif + + using unit_t = codecvt_result; + unit_t unit; + write_codecvt(unit, in, loc); + // In UTF-8 is used one to four one-byte code units. + auto&& buf = basic_memory_buffer(); + for (code_unit* p = unit.buf; p != unit.end; ++p) { + uint32_t c = static_cast(*p); + if (sizeof(code_unit) == 2 && c >= 0xd800 && c <= 0xdfff) { + // surrogate pair + ++p; + if (p == unit.end || (c & 0xfc00) != 0xd800 || + (*p & 0xfc00) != 0xdc00) { + FMT_THROW(format_error("failed to format time")); + } + c = (c << 10) + static_cast(*p) - 0x35fdc00; + } + if (c < 0x80) { + buf.push_back(static_cast(c)); + } else if (c < 0x800) { + buf.push_back(static_cast(0xc0 | (c >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { + buf.push_back(static_cast(0xe0 | (c >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if (c >= 0x10000 && c <= 0x10ffff) { + buf.push_back(static_cast(0xf0 | (c >> 18))); + buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else { + FMT_THROW(format_error("failed to format time")); + } + } + return copy_str(buf.data(), buf.data() + buf.size(), out); + } + return copy_str(in.data(), in.data() + in.size(), out); +} + +template ::value)> +auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc) + -> OutputIt { + codecvt_result unit; + write_codecvt(unit, sv, loc); + return copy_str(unit.buf, unit.end, out); +} + +template ::value)> +auto write_tm_str(OutputIt out, string_view sv, const std::locale& loc) + -> OutputIt { + return write_encoded_tm_str(out, sv, loc); +} + +template +inline void do_write(buffer& buf, const std::tm& time, + const std::locale& loc, char format, char modifier) { + auto&& format_buf = formatbuf>(buf); + auto&& os = std::basic_ostream(&format_buf); + os.imbue(loc); + using iterator = std::ostreambuf_iterator; + const auto& facet = std::use_facet>(loc); + auto end = facet.put(os, os, Char(' '), &time, format, modifier); + if (end.failed()) FMT_THROW(format_error("failed to format time")); +} + +template ::value)> +auto write(OutputIt out, const std::tm& time, const std::locale& loc, + char format, char modifier = 0) -> OutputIt { + auto&& buf = get_buffer(out); + do_write(buf, time, loc, format, modifier); + return get_iterator(buf, out); +} + +template ::value)> +auto write(OutputIt out, const std::tm& time, const std::locale& loc, + char format, char modifier = 0) -> OutputIt { + auto&& buf = basic_memory_buffer(); + do_write(buf, time, loc, format, modifier); + return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); +} + +} // namespace detail + +FMT_MODULE_EXPORT_BEGIN + +/** + Converts given time since epoch as ``std::time_t`` value into calendar time, + expressed in local time. Unlike ``std::localtime``, this function is + thread-safe on most platforms. + */ +inline std::tm localtime(std::time_t time) { + struct dispatcher { + std::time_t time_; + std::tm tm_; + + dispatcher(std::time_t t) : time_(t) {} + + bool run() { + using namespace fmt::detail; + return handle(localtime_r(&time_, &tm_)); + } + + bool handle(std::tm* tm) { return tm != nullptr; } + + bool handle(detail::null<>) { + using namespace fmt::detail; + return fallback(localtime_s(&tm_, &time_)); + } + + bool fallback(int res) { return res == 0; } + +#if !FMT_MSC_VERSION + bool fallback(detail::null<>) { + using namespace fmt::detail; + std::tm* tm = std::localtime(&time_); + if (tm) tm_ = *tm; + return tm != nullptr; + } +#endif + }; + dispatcher lt(time); + // Too big time values may be unsupported. + if (!lt.run()) FMT_THROW(format_error("time_t value out of range")); + return lt.tm_; +} + +inline std::tm localtime( + std::chrono::time_point time_point) { + return localtime(std::chrono::system_clock::to_time_t(time_point)); +} + +/** + Converts given time since epoch as ``std::time_t`` value into calendar time, + expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this + function is thread-safe on most platforms. + */ +inline std::tm gmtime(std::time_t time) { + struct dispatcher { + std::time_t time_; + std::tm tm_; + + dispatcher(std::time_t t) : time_(t) {} + + bool run() { + using namespace fmt::detail; + return handle(gmtime_r(&time_, &tm_)); + } + + bool handle(std::tm* tm) { return tm != nullptr; } + + bool handle(detail::null<>) { + using namespace fmt::detail; + return fallback(gmtime_s(&tm_, &time_)); + } + + bool fallback(int res) { return res == 0; } + +#if !FMT_MSC_VERSION + bool fallback(detail::null<>) { + std::tm* tm = std::gmtime(&time_); + if (tm) tm_ = *tm; + return tm != nullptr; + } +#endif + }; + dispatcher gt(time); + // Too big time values may be unsupported. + if (!gt.run()) FMT_THROW(format_error("time_t value out of range")); + return gt.tm_; +} + +inline std::tm gmtime( + std::chrono::time_point time_point) { + return gmtime(std::chrono::system_clock::to_time_t(time_point)); +} + +FMT_BEGIN_DETAIL_NAMESPACE + +// Writes two-digit numbers a, b and c separated by sep to buf. +// The method by Pavel Novikov based on +// https://johnnylee-sde.github.io/Fast-unsigned-integer-to-time-string/. +inline void write_digit2_separated(char* buf, unsigned a, unsigned b, + unsigned c, char sep) { + unsigned long long digits = + a | (b << 24) | (static_cast(c) << 48); + // Convert each value to BCD. + // We have x = a * 10 + b and we want to convert it to BCD y = a * 16 + b. + // The difference is + // y - x = a * 6 + // a can be found from x: + // a = floor(x / 10) + // then + // y = x + a * 6 = x + floor(x / 10) * 6 + // floor(x / 10) is (x * 205) >> 11 (needs 16 bits). + digits += (((digits * 205) >> 11) & 0x000f00000f00000f) * 6; + // Put low nibbles to high bytes and high nibbles to low bytes. + digits = ((digits & 0x00f00000f00000f0) >> 4) | + ((digits & 0x000f00000f00000f) << 8); + auto usep = static_cast(sep); + // Add ASCII '0' to each digit byte and insert separators. + digits |= 0x3030003030003030 | (usep << 16) | (usep << 40); + + constexpr const size_t len = 8; + if (const_check(is_big_endian())) { + char tmp[len]; + std::memcpy(tmp, &digits, len); + std::reverse_copy(tmp, tmp + len, buf); + } else { + std::memcpy(buf, &digits, len); + } +} + +template FMT_CONSTEXPR inline const char* get_units() { + if (std::is_same::value) return "as"; + if (std::is_same::value) return "fs"; + if (std::is_same::value) return "ps"; + if (std::is_same::value) return "ns"; + if (std::is_same::value) return "µs"; + if (std::is_same::value) return "ms"; + if (std::is_same::value) return "cs"; + if (std::is_same::value) return "ds"; + if (std::is_same>::value) return "s"; + if (std::is_same::value) return "das"; + if (std::is_same::value) return "hs"; + if (std::is_same::value) return "ks"; + if (std::is_same::value) return "Ms"; + if (std::is_same::value) return "Gs"; + if (std::is_same::value) return "Ts"; + if (std::is_same::value) return "Ps"; + if (std::is_same::value) return "Es"; + if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "h"; + return nullptr; +} + +enum class numeric_system { + standard, + // Alternative numeric system, e.g. 十二 instead of 12 in ja_JP locale. + alternative +}; + +// Parses a put_time-like format string and invokes handler actions. +template +FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, + const Char* end, + Handler&& handler) { + auto ptr = begin; + while (ptr != end) { + auto c = *ptr; + if (c == '}') break; + if (c != '%') { + ++ptr; + continue; + } + if (begin != ptr) handler.on_text(begin, ptr); + ++ptr; // consume '%' + if (ptr == end) FMT_THROW(format_error("invalid format")); + c = *ptr++; + switch (c) { + case '%': + handler.on_text(ptr - 1, ptr); + break; + case 'n': { + const Char newline[] = {'\n'}; + handler.on_text(newline, newline + 1); + break; + } + case 't': { + const Char tab[] = {'\t'}; + handler.on_text(tab, tab + 1); + break; + } + // Year: + case 'Y': + handler.on_year(numeric_system::standard); + break; + case 'y': + handler.on_short_year(numeric_system::standard); + break; + case 'C': + handler.on_century(numeric_system::standard); + break; + case 'G': + handler.on_iso_week_based_year(); + break; + case 'g': + handler.on_iso_week_based_short_year(); + break; + // Day of the week: + case 'a': + handler.on_abbr_weekday(); + break; + case 'A': + handler.on_full_weekday(); + break; + case 'w': + handler.on_dec0_weekday(numeric_system::standard); + break; + case 'u': + handler.on_dec1_weekday(numeric_system::standard); + break; + // Month: + case 'b': + case 'h': + handler.on_abbr_month(); + break; + case 'B': + handler.on_full_month(); + break; + case 'm': + handler.on_dec_month(numeric_system::standard); + break; + // Day of the year/month: + case 'U': + handler.on_dec0_week_of_year(numeric_system::standard); + break; + case 'W': + handler.on_dec1_week_of_year(numeric_system::standard); + break; + case 'V': + handler.on_iso_week_of_year(numeric_system::standard); + break; + case 'j': + handler.on_day_of_year(); + break; + case 'd': + handler.on_day_of_month(numeric_system::standard); + break; + case 'e': + handler.on_day_of_month_space(numeric_system::standard); + break; + // Hour, minute, second: + case 'H': + handler.on_24_hour(numeric_system::standard); + break; + case 'I': + handler.on_12_hour(numeric_system::standard); + break; + case 'M': + handler.on_minute(numeric_system::standard); + break; + case 'S': + handler.on_second(numeric_system::standard); + break; + // Other: + case 'c': + handler.on_datetime(numeric_system::standard); + break; + case 'x': + handler.on_loc_date(numeric_system::standard); + break; + case 'X': + handler.on_loc_time(numeric_system::standard); + break; + case 'D': + handler.on_us_date(); + break; + case 'F': + handler.on_iso_date(); + break; + case 'r': + handler.on_12_hour_time(); + break; + case 'R': + handler.on_24_hour_time(); + break; + case 'T': + handler.on_iso_time(); + break; + case 'p': + handler.on_am_pm(); + break; + case 'Q': + handler.on_duration_value(); + break; + case 'q': + handler.on_duration_unit(); + break; + case 'z': + handler.on_utc_offset(); + break; + case 'Z': + handler.on_tz_name(); + break; + // Alternative representation: + case 'E': { + if (ptr == end) FMT_THROW(format_error("invalid format")); + c = *ptr++; + switch (c) { + case 'Y': + handler.on_year(numeric_system::alternative); + break; + case 'y': + handler.on_offset_year(); + break; + case 'C': + handler.on_century(numeric_system::alternative); + break; + case 'c': + handler.on_datetime(numeric_system::alternative); + break; + case 'x': + handler.on_loc_date(numeric_system::alternative); + break; + case 'X': + handler.on_loc_time(numeric_system::alternative); + break; + default: + FMT_THROW(format_error("invalid format")); + } + break; + } + case 'O': + if (ptr == end) FMT_THROW(format_error("invalid format")); + c = *ptr++; + switch (c) { + case 'y': + handler.on_short_year(numeric_system::alternative); + break; + case 'm': + handler.on_dec_month(numeric_system::alternative); + break; + case 'U': + handler.on_dec0_week_of_year(numeric_system::alternative); + break; + case 'W': + handler.on_dec1_week_of_year(numeric_system::alternative); + break; + case 'V': + handler.on_iso_week_of_year(numeric_system::alternative); + break; + case 'd': + handler.on_day_of_month(numeric_system::alternative); + break; + case 'e': + handler.on_day_of_month_space(numeric_system::alternative); + break; + case 'w': + handler.on_dec0_weekday(numeric_system::alternative); + break; + case 'u': + handler.on_dec1_weekday(numeric_system::alternative); + break; + case 'H': + handler.on_24_hour(numeric_system::alternative); + break; + case 'I': + handler.on_12_hour(numeric_system::alternative); + break; + case 'M': + handler.on_minute(numeric_system::alternative); + break; + case 'S': + handler.on_second(numeric_system::alternative); + break; + default: + FMT_THROW(format_error("invalid format")); + } + break; + default: + FMT_THROW(format_error("invalid format")); + } + begin = ptr; + } + if (begin != ptr) handler.on_text(begin, ptr); + return ptr; +} + +template struct null_chrono_spec_handler { + FMT_CONSTEXPR void unsupported() { + static_cast(this)->unsupported(); + } + FMT_CONSTEXPR void on_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_short_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_offset_year() { unsupported(); } + FMT_CONSTEXPR void on_century(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_iso_week_based_year() { unsupported(); } + FMT_CONSTEXPR void on_iso_week_based_short_year() { unsupported(); } + FMT_CONSTEXPR void on_abbr_weekday() { unsupported(); } + FMT_CONSTEXPR void on_full_weekday() { unsupported(); } + FMT_CONSTEXPR void on_dec0_weekday(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec1_weekday(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_abbr_month() { unsupported(); } + FMT_CONSTEXPR void on_full_month() { unsupported(); } + FMT_CONSTEXPR void on_dec_month(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_day_of_year() { unsupported(); } + FMT_CONSTEXPR void on_day_of_month(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_day_of_month_space(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_second(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_datetime(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_loc_date(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_loc_time(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_us_date() { unsupported(); } + FMT_CONSTEXPR void on_iso_date() { unsupported(); } + FMT_CONSTEXPR void on_12_hour_time() { unsupported(); } + FMT_CONSTEXPR void on_24_hour_time() { unsupported(); } + FMT_CONSTEXPR void on_iso_time() { unsupported(); } + FMT_CONSTEXPR void on_am_pm() { unsupported(); } + FMT_CONSTEXPR void on_duration_value() { unsupported(); } + FMT_CONSTEXPR void on_duration_unit() { unsupported(); } + FMT_CONSTEXPR void on_utc_offset() { unsupported(); } + FMT_CONSTEXPR void on_tz_name() { unsupported(); } +}; + +struct tm_format_checker : null_chrono_spec_handler { + FMT_NORETURN void unsupported() { FMT_THROW(format_error("no format")); } + + template + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_year(numeric_system) {} + FMT_CONSTEXPR void on_short_year(numeric_system) {} + FMT_CONSTEXPR void on_offset_year() {} + FMT_CONSTEXPR void on_century(numeric_system) {} + FMT_CONSTEXPR void on_iso_week_based_year() {} + FMT_CONSTEXPR void on_iso_week_based_short_year() {} + FMT_CONSTEXPR void on_abbr_weekday() {} + FMT_CONSTEXPR void on_full_weekday() {} + FMT_CONSTEXPR void on_dec0_weekday(numeric_system) {} + FMT_CONSTEXPR void on_dec1_weekday(numeric_system) {} + FMT_CONSTEXPR void on_abbr_month() {} + FMT_CONSTEXPR void on_full_month() {} + FMT_CONSTEXPR void on_dec_month(numeric_system) {} + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) {} + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) {} + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) {} + FMT_CONSTEXPR void on_day_of_year() {} + FMT_CONSTEXPR void on_day_of_month(numeric_system) {} + FMT_CONSTEXPR void on_day_of_month_space(numeric_system) {} + FMT_CONSTEXPR void on_24_hour(numeric_system) {} + FMT_CONSTEXPR void on_12_hour(numeric_system) {} + FMT_CONSTEXPR void on_minute(numeric_system) {} + FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_datetime(numeric_system) {} + FMT_CONSTEXPR void on_loc_date(numeric_system) {} + FMT_CONSTEXPR void on_loc_time(numeric_system) {} + FMT_CONSTEXPR void on_us_date() {} + FMT_CONSTEXPR void on_iso_date() {} + FMT_CONSTEXPR void on_12_hour_time() {} + FMT_CONSTEXPR void on_24_hour_time() {} + FMT_CONSTEXPR void on_iso_time() {} + FMT_CONSTEXPR void on_am_pm() {} + FMT_CONSTEXPR void on_utc_offset() {} + FMT_CONSTEXPR void on_tz_name() {} +}; + +inline const char* tm_wday_full_name(int wday) { + static constexpr const char* full_name_list[] = { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday"}; + return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; +} +inline const char* tm_wday_short_name(int wday) { + static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", + "Thu", "Fri", "Sat"}; + return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; +} + +inline const char* tm_mon_full_name(int mon) { + static constexpr const char* full_name_list[] = { + "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", "December"}; + return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; +} +inline const char* tm_mon_short_name(int mon) { + static constexpr const char* short_name_list[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + }; + return mon >= 0 && mon <= 11 ? short_name_list[mon] : "???"; +} + +template +struct has_member_data_tm_gmtoff : std::false_type {}; +template +struct has_member_data_tm_gmtoff> + : std::true_type {}; + +template +struct has_member_data_tm_zone : std::false_type {}; +template +struct has_member_data_tm_zone> + : std::true_type {}; + +#if FMT_USE_TZSET +inline void tzset_once() { + static bool init = []() -> bool { + _tzset(); + return true; + }(); + ignore_unused(init); +} +#endif + +template class tm_writer { + private: + static constexpr int days_per_week = 7; + + const std::locale& loc_; + const bool is_classic_; + OutputIt out_; + const std::tm& tm_; + + auto tm_sec() const noexcept -> int { + FMT_ASSERT(tm_.tm_sec >= 0 && tm_.tm_sec <= 61, ""); + return tm_.tm_sec; + } + auto tm_min() const noexcept -> int { + FMT_ASSERT(tm_.tm_min >= 0 && tm_.tm_min <= 59, ""); + return tm_.tm_min; + } + auto tm_hour() const noexcept -> int { + FMT_ASSERT(tm_.tm_hour >= 0 && tm_.tm_hour <= 23, ""); + return tm_.tm_hour; + } + auto tm_mday() const noexcept -> int { + FMT_ASSERT(tm_.tm_mday >= 1 && tm_.tm_mday <= 31, ""); + return tm_.tm_mday; + } + auto tm_mon() const noexcept -> int { + FMT_ASSERT(tm_.tm_mon >= 0 && tm_.tm_mon <= 11, ""); + return tm_.tm_mon; + } + auto tm_year() const noexcept -> long long { return 1900ll + tm_.tm_year; } + auto tm_wday() const noexcept -> int { + FMT_ASSERT(tm_.tm_wday >= 0 && tm_.tm_wday <= 6, ""); + return tm_.tm_wday; + } + auto tm_yday() const noexcept -> int { + FMT_ASSERT(tm_.tm_yday >= 0 && tm_.tm_yday <= 365, ""); + return tm_.tm_yday; + } + + auto tm_hour12() const noexcept -> int { + const auto h = tm_hour(); + const auto z = h < 12 ? h : h - 12; + return z == 0 ? 12 : z; + } + + // POSIX and the C Standard are unclear or inconsistent about what %C and %y + // do if the year is negative or exceeds 9999. Use the convention that %C + // concatenated with %y yields the same output as %Y, and that %Y contains at + // least 4 characters, with more only if necessary. + auto split_year_lower(long long year) const noexcept -> int { + auto l = year % 100; + if (l < 0) l = -l; // l in [0, 99] + return static_cast(l); + } + + // Algorithm: + // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + auto iso_year_weeks(long long curr_year) const noexcept -> int { + const auto prev_year = curr_year - 1; + const auto curr_p = + (curr_year + curr_year / 4 - curr_year / 100 + curr_year / 400) % + days_per_week; + const auto prev_p = + (prev_year + prev_year / 4 - prev_year / 100 + prev_year / 400) % + days_per_week; + return 52 + ((curr_p == 4 || prev_p == 3) ? 1 : 0); + } + auto iso_week_num(int tm_yday, int tm_wday) const noexcept -> int { + return (tm_yday + 11 - (tm_wday == 0 ? days_per_week : tm_wday)) / + days_per_week; + } + auto tm_iso_week_year() const noexcept -> long long { + const auto year = tm_year(); + const auto w = iso_week_num(tm_yday(), tm_wday()); + if (w < 1) return year - 1; + if (w > iso_year_weeks(year)) return year + 1; + return year; + } + auto tm_iso_week_of_year() const noexcept -> int { + const auto year = tm_year(); + const auto w = iso_week_num(tm_yday(), tm_wday()); + if (w < 1) return iso_year_weeks(year - 1); + if (w > iso_year_weeks(year)) return 1; + return w; + } + + void write1(int value) { + *out_++ = static_cast('0' + to_unsigned(value) % 10); + } + void write2(int value) { + const char* d = digits2(to_unsigned(value) % 100); + *out_++ = *d++; + *out_++ = *d; + } + + void write_year_extended(long long year) { + // At least 4 characters. + int width = 4; + if (year < 0) { + *out_++ = '-'; + year = 0 - year; + --width; + } + uint32_or_64_or_128_t n = to_unsigned(year); + const int num_digits = count_digits(n); + if (width > num_digits) out_ = std::fill_n(out_, width - num_digits, '0'); + out_ = format_decimal(out_, n, num_digits).end; + } + void write_year(long long year) { + if (year >= 0 && year < 10000) { + write2(static_cast(year / 100)); + write2(static_cast(year % 100)); + } else { + write_year_extended(year); + } + } + + void write_utc_offset(long offset) { + if (offset < 0) { + *out_++ = '-'; + offset = -offset; + } else { + *out_++ = '+'; + } + offset /= 60; + write2(static_cast(offset / 60)); + write2(static_cast(offset % 60)); + } + template ::value)> + void format_utc_offset_impl(const T& tm) { + write_utc_offset(tm.tm_gmtoff); + } + template ::value)> + void format_utc_offset_impl(const T& tm) { +#if defined(_WIN32) && defined(_UCRT) +# if FMT_USE_TZSET + tzset_once(); +# endif + long offset = 0; + _get_timezone(&offset); + if (tm.tm_isdst) { + long dstbias = 0; + _get_dstbias(&dstbias); + offset += dstbias; + } + write_utc_offset(-offset); +#else + ignore_unused(tm); + format_localized('z'); +#endif + } + + template ::value)> + void format_tz_name_impl(const T& tm) { + if (is_classic_) + out_ = write_tm_str(out_, tm.tm_zone, loc_); + else + format_localized('Z'); + } + template ::value)> + void format_tz_name_impl(const T&) { + format_localized('Z'); + } + + void format_localized(char format, char modifier = 0) { + out_ = write(out_, tm_, loc_, format, modifier); + } + + public: + tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm) + : loc_(loc), + is_classic_(loc_ == get_classic_locale()), + out_(out), + tm_(tm) {} + + OutputIt out() const { return out_; } + + FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { + out_ = copy_str(begin, end, out_); + } + + void on_abbr_weekday() { + if (is_classic_) + out_ = write(out_, tm_wday_short_name(tm_wday())); + else + format_localized('a'); + } + void on_full_weekday() { + if (is_classic_) + out_ = write(out_, tm_wday_full_name(tm_wday())); + else + format_localized('A'); + } + void on_dec0_weekday(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) return write1(tm_wday()); + format_localized('w', 'O'); + } + void on_dec1_weekday(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) { + auto wday = tm_wday(); + write1(wday == 0 ? days_per_week : wday); + } else { + format_localized('u', 'O'); + } + } + + void on_abbr_month() { + if (is_classic_) + out_ = write(out_, tm_mon_short_name(tm_mon())); + else + format_localized('b'); + } + void on_full_month() { + if (is_classic_) + out_ = write(out_, tm_mon_full_name(tm_mon())); + else + format_localized('B'); + } + + void on_datetime(numeric_system ns) { + if (is_classic_) { + on_abbr_weekday(); + *out_++ = ' '; + on_abbr_month(); + *out_++ = ' '; + on_day_of_month_space(numeric_system::standard); + *out_++ = ' '; + on_iso_time(); + *out_++ = ' '; + on_year(numeric_system::standard); + } else { + format_localized('c', ns == numeric_system::standard ? '\0' : 'E'); + } + } + void on_loc_date(numeric_system ns) { + if (is_classic_) + on_us_date(); + else + format_localized('x', ns == numeric_system::standard ? '\0' : 'E'); + } + void on_loc_time(numeric_system ns) { + if (is_classic_) + on_iso_time(); + else + format_localized('X', ns == numeric_system::standard ? '\0' : 'E'); + } + void on_us_date() { + char buf[8]; + write_digit2_separated(buf, to_unsigned(tm_mon() + 1), + to_unsigned(tm_mday()), + to_unsigned(split_year_lower(tm_year())), '/'); + out_ = copy_str(std::begin(buf), std::end(buf), out_); + } + void on_iso_date() { + auto year = tm_year(); + char buf[10]; + size_t offset = 0; + if (year >= 0 && year < 10000) { + copy2(buf, digits2(static_cast(year / 100))); + } else { + offset = 4; + write_year_extended(year); + year = 0; + } + write_digit2_separated(buf + 2, static_cast(year % 100), + to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()), + '-'); + out_ = copy_str(std::begin(buf) + offset, std::end(buf), out_); + } + + void on_utc_offset() { format_utc_offset_impl(tm_); } + void on_tz_name() { format_tz_name_impl(tm_); } + + void on_year(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write_year(tm_year()); + format_localized('Y', 'E'); + } + void on_short_year(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write2(split_year_lower(tm_year())); + format_localized('y', 'O'); + } + void on_offset_year() { + if (is_classic_) return write2(split_year_lower(tm_year())); + format_localized('y', 'E'); + } + + void on_century(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) { + auto year = tm_year(); + auto upper = year / 100; + if (year >= -99 && year < 0) { + // Zero upper on negative year. + *out_++ = '-'; + *out_++ = '0'; + } else if (upper >= 0 && upper < 100) { + write2(static_cast(upper)); + } else { + out_ = write(out_, upper); + } + } else { + format_localized('C', 'E'); + } + } + + void on_dec_month(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_mon() + 1); + format_localized('m', 'O'); + } + + void on_dec0_week_of_year(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week); + format_localized('U', 'O'); + } + void on_dec1_week_of_year(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) { + auto wday = tm_wday(); + write2((tm_yday() + days_per_week - + (wday == 0 ? (days_per_week - 1) : (wday - 1))) / + days_per_week); + } else { + format_localized('W', 'O'); + } + } + void on_iso_week_of_year(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_iso_week_of_year()); + format_localized('V', 'O'); + } + + void on_iso_week_based_year() { write_year(tm_iso_week_year()); } + void on_iso_week_based_short_year() { + write2(split_year_lower(tm_iso_week_year())); + } + + void on_day_of_year() { + auto yday = tm_yday() + 1; + write1(yday / 100); + write2(yday % 100); + } + void on_day_of_month(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) return write2(tm_mday()); + format_localized('d', 'O'); + } + void on_day_of_month_space(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) { + auto mday = to_unsigned(tm_mday()) % 100; + const char* d2 = digits2(mday); + *out_++ = mday < 10 ? ' ' : d2[0]; + *out_++ = d2[1]; + } else { + format_localized('e', 'O'); + } + } + + void on_24_hour(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) return write2(tm_hour()); + format_localized('H', 'O'); + } + void on_12_hour(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_hour12()); + format_localized('I', 'O'); + } + void on_minute(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) return write2(tm_min()); + format_localized('M', 'O'); + } + void on_second(numeric_system ns) { + if (is_classic_ || ns == numeric_system::standard) return write2(tm_sec()); + format_localized('S', 'O'); + } + + void on_12_hour_time() { + if (is_classic_) { + char buf[8]; + write_digit2_separated(buf, to_unsigned(tm_hour12()), + to_unsigned(tm_min()), to_unsigned(tm_sec()), ':'); + out_ = copy_str(std::begin(buf), std::end(buf), out_); + *out_++ = ' '; + on_am_pm(); + } else { + format_localized('r'); + } + } + void on_24_hour_time() { + write2(tm_hour()); + *out_++ = ':'; + write2(tm_min()); + } + void on_iso_time() { + char buf[8]; + write_digit2_separated(buf, to_unsigned(tm_hour()), to_unsigned(tm_min()), + to_unsigned(tm_sec()), ':'); + out_ = copy_str(std::begin(buf), std::end(buf), out_); + } + + void on_am_pm() { + if (is_classic_) { + *out_++ = tm_hour() < 12 ? 'A' : 'P'; + *out_++ = 'M'; + } else { + format_localized('p'); + } + } + + // These apply to chrono durations but not tm. + void on_duration_value() {} + void on_duration_unit() {} +}; + +struct chrono_format_checker : null_chrono_spec_handler { + FMT_NORETURN void unsupported() { FMT_THROW(format_error("no date")); } + + template + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + FMT_CONSTEXPR void on_24_hour(numeric_system) {} + FMT_CONSTEXPR void on_12_hour(numeric_system) {} + FMT_CONSTEXPR void on_minute(numeric_system) {} + FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_12_hour_time() {} + FMT_CONSTEXPR void on_24_hour_time() {} + FMT_CONSTEXPR void on_iso_time() {} + FMT_CONSTEXPR void on_am_pm() {} + FMT_CONSTEXPR void on_duration_value() {} + FMT_CONSTEXPR void on_duration_unit() {} +}; + +template ::value)> +inline bool isfinite(T) { + return true; +} + +// Converts value to Int and checks that it's in the range [0, upper). +template ::value)> +inline Int to_nonnegative_int(T value, Int upper) { + FMT_ASSERT(std::is_unsigned::value || + (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), + "invalid value"); + (void)upper; + return static_cast(value); +} +template ::value)> +inline Int to_nonnegative_int(T value, Int upper) { + if (value < 0 || value > static_cast(upper)) + FMT_THROW(format_error("invalid value")); + return static_cast(value); +} + +template ::value)> +inline T mod(T x, int y) { + return x % static_cast(y); +} +template ::value)> +inline T mod(T x, int y) { + return std::fmod(x, static_cast(y)); +} + +// If T is an integral type, maps T to its unsigned counterpart, otherwise +// leaves it unchanged (unlike std::make_unsigned). +template ::value> +struct make_unsigned_or_unchanged { + using type = T; +}; + +template struct make_unsigned_or_unchanged { + using type = typename std::make_unsigned::type; +}; + +#if FMT_SAFE_DURATION_CAST +// throwing version of safe_duration_cast +template +To fmt_safe_duration_cast(std::chrono::duration from) { + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +} +#endif + +template ::value)> +inline std::chrono::duration get_milliseconds( + std::chrono::duration d) { + // this may overflow and/or the result may not fit in the + // target type. +#if FMT_SAFE_DURATION_CAST + using CommonSecondsType = + typename std::common_type::type; + const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_whole_seconds = + fmt_safe_duration_cast(d_as_common); + // this conversion should be nonproblematic + const auto diff = d_as_common - d_as_whole_seconds; + const auto ms = + fmt_safe_duration_cast>(diff); + return ms; +#else + auto s = std::chrono::duration_cast(d); + return std::chrono::duration_cast(d - s); +#endif +} + +// Counts the number of fractional digits in the range [0, 18] according to the +// C++20 spec. If more than 18 fractional digits are required then returns 6 for +// microseconds precision. +template () / 10)> +struct count_fractional_digits { + static constexpr int value = + Num % Den == 0 ? N : count_fractional_digits::value; +}; + +// Base case that doesn't instantiate any more templates +// in order to avoid overflow. +template +struct count_fractional_digits { + static constexpr int value = (Num % Den == 0) ? N : 6; +}; + +constexpr long long pow10(std::uint32_t n) { + return n == 0 ? 1 : 10 * pow10(n - 1); +} + +template ::is_signed)> +constexpr std::chrono::duration abs( + std::chrono::duration d) { + // We need to compare the duration using the count() method directly + // due to a compiler bug in clang-11 regarding the spaceship operator, + // when -Wzero-as-null-pointer-constant is enabled. + // In clang-12 the bug has been fixed. See + // https://bugs.llvm.org/show_bug.cgi?id=46235 and the reproducible example: + // https://www.godbolt.org/z/Knbb5joYx. + return d.count() >= d.zero().count() ? d : -d; +} + +template ::is_signed)> +constexpr std::chrono::duration abs( + std::chrono::duration d) { + return d; +} + +template ::value)> +OutputIt format_duration_value(OutputIt out, Rep val, int) { + return write(out, val); +} + +template ::value)> +OutputIt format_duration_value(OutputIt out, Rep val, int precision) { + auto specs = basic_format_specs(); + specs.precision = precision; + specs.type = precision >= 0 ? presentation_type::fixed_lower + : presentation_type::general_lower; + return write(out, val, specs); +} + +template +OutputIt copy_unit(string_view unit, OutputIt out, Char) { + return std::copy(unit.begin(), unit.end(), out); +} + +template +OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { + // This works when wchar_t is UTF-32 because units only contain characters + // that have the same representation in UTF-16 and UTF-32. + utf8_to_utf16 u(unit); + return std::copy(u.c_str(), u.c_str() + u.size(), out); +} + +template +OutputIt format_duration_unit(OutputIt out) { + if (const char* unit = get_units()) + return copy_unit(string_view(unit), out, Char()); + *out++ = '['; + out = write(out, Period::num); + if (const_check(Period::den != 1)) { + *out++ = '/'; + out = write(out, Period::den); + } + *out++ = ']'; + *out++ = 's'; + return out; +} + +class get_locale { + private: + union { + std::locale locale_; + }; + bool has_locale_ = false; + + public: + get_locale(bool localized, locale_ref loc) : has_locale_(localized) { + if (localized) + ::new (&locale_) std::locale(loc.template get()); + } + ~get_locale() { + if (has_locale_) locale_.~locale(); + } + operator const std::locale&() const { + return has_locale_ ? locale_ : get_classic_locale(); + } +}; + +template +struct chrono_formatter { + FormatContext& context; + OutputIt out; + int precision; + bool localized = false; + // rep is unsigned to avoid overflow. + using rep = + conditional_t::value && sizeof(Rep) < sizeof(int), + unsigned, typename make_unsigned_or_unchanged::type>; + rep val; + using seconds = std::chrono::duration; + seconds s; + using milliseconds = std::chrono::duration; + bool negative; + + using char_type = typename FormatContext::char_type; + using tm_writer_type = tm_writer; + + chrono_formatter(FormatContext& ctx, OutputIt o, + std::chrono::duration d) + : context(ctx), + out(o), + val(static_cast(d.count())), + negative(false) { + if (d.count() < 0) { + val = 0 - val; + negative = true; + } + + // this may overflow and/or the result may not fit in the + // target type. +#if FMT_SAFE_DURATION_CAST + // might need checked conversion (rep!=Rep) + auto tmpval = std::chrono::duration(val); + s = fmt_safe_duration_cast(tmpval); +#else + s = std::chrono::duration_cast( + std::chrono::duration(val)); +#endif + } + + // returns true if nan or inf, writes to out. + bool handle_nan_inf() { + if (isfinite(val)) { + return false; + } + if (isnan(val)) { + write_nan(); + return true; + } + // must be +-inf + if (val > 0) { + write_pinf(); + } else { + write_ninf(); + } + return true; + } + + Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + + Rep hour12() const { + Rep hour = static_cast(mod((s.count() / 3600), 12)); + return hour <= 0 ? 12 : hour; + } + + Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } + Rep second() const { return static_cast(mod(s.count(), 60)); } + + std::tm time() const { + auto time = std::tm(); + time.tm_hour = to_nonnegative_int(hour(), 24); + time.tm_min = to_nonnegative_int(minute(), 60); + time.tm_sec = to_nonnegative_int(second(), 60); + return time; + } + + void write_sign() { + if (negative) { + *out++ = '-'; + negative = false; + } + } + + void write(Rep value, int width) { + write_sign(); + if (isnan(value)) return write_nan(); + uint32_or_64_or_128_t n = + to_unsigned(to_nonnegative_int(value, max_value())); + int num_digits = detail::count_digits(n); + if (width > num_digits) out = std::fill_n(out, width - num_digits, '0'); + out = format_decimal(out, n, num_digits).end; + } + + template void write_fractional_seconds(Duration d) { + FMT_ASSERT(!std::is_floating_point::value, ""); + constexpr auto num_fractional_digits = + count_fractional_digits::value; + + using subsecond_precision = std::chrono::duration< + typename std::common_type::type, + std::ratio<1, detail::pow10(num_fractional_digits)>>; + if (std::ratio_less::value) { + *out++ = '.'; + auto fractional = + detail::abs(d) - std::chrono::duration_cast(d); + auto subseconds = + std::chrono::treat_as_floating_point< + typename subsecond_precision::rep>::value + ? fractional.count() + : std::chrono::duration_cast(fractional) + .count(); + uint32_or_64_or_128_t n = + to_unsigned(to_nonnegative_int(subseconds, max_value())); + int num_digits = detail::count_digits(n); + if (num_fractional_digits > num_digits) + out = std::fill_n(out, num_fractional_digits - num_digits, '0'); + out = format_decimal(out, n, num_digits).end; + } + } + + void write_nan() { std::copy_n("nan", 3, out); } + void write_pinf() { std::copy_n("inf", 3, out); } + void write_ninf() { std::copy_n("-inf", 4, out); } + + template + void format_tm(const tm& time, Callback cb, Args... args) { + if (isnan(val)) return write_nan(); + get_locale loc(localized, context.locale()); + auto w = tm_writer_type(loc, out, time); + (w.*cb)(args...); + out = w.out(); + } + + void on_text(const char_type* begin, const char_type* end) { + std::copy(begin, end, out); + } + + // These are not implemented because durations don't have date information. + void on_abbr_weekday() {} + void on_full_weekday() {} + void on_dec0_weekday(numeric_system) {} + void on_dec1_weekday(numeric_system) {} + void on_abbr_month() {} + void on_full_month() {} + void on_datetime(numeric_system) {} + void on_loc_date(numeric_system) {} + void on_loc_time(numeric_system) {} + void on_us_date() {} + void on_iso_date() {} + void on_utc_offset() {} + void on_tz_name() {} + void on_year(numeric_system) {} + void on_short_year(numeric_system) {} + void on_offset_year() {} + void on_century(numeric_system) {} + void on_iso_week_based_year() {} + void on_iso_week_based_short_year() {} + void on_dec_month(numeric_system) {} + void on_dec0_week_of_year(numeric_system) {} + void on_dec1_week_of_year(numeric_system) {} + void on_iso_week_of_year(numeric_system) {} + void on_day_of_year() {} + void on_day_of_month(numeric_system) {} + void on_day_of_month_space(numeric_system) {} + + void on_24_hour(numeric_system ns) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) return write(hour(), 2); + auto time = tm(); + time.tm_hour = to_nonnegative_int(hour(), 24); + format_tm(time, &tm_writer_type::on_24_hour, ns); + } + + void on_12_hour(numeric_system ns) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) return write(hour12(), 2); + auto time = tm(); + time.tm_hour = to_nonnegative_int(hour12(), 12); + format_tm(time, &tm_writer_type::on_12_hour, ns); + } + + void on_minute(numeric_system ns) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) return write(minute(), 2); + auto time = tm(); + time.tm_min = to_nonnegative_int(minute(), 60); + format_tm(time, &tm_writer_type::on_minute, ns); + } + + void on_second(numeric_system ns) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) { + if (std::is_floating_point::value) { + constexpr auto num_fractional_digits = + count_fractional_digits::value; + auto buf = memory_buffer(); + format_to(std::back_inserter(buf), runtime("{:.{}f}"), + std::fmod(val * static_cast(Period::num) / + static_cast(Period::den), + static_cast(60)), + num_fractional_digits); + if (negative) *out++ = '-'; + if (buf.size() < 2 || buf[1] == '.') *out++ = '0'; + out = std::copy(buf.begin(), buf.end(), out); + } else { + write(second(), 2); + write_fractional_seconds(std::chrono::duration(val)); + } + return; + } + auto time = tm(); + time.tm_sec = to_nonnegative_int(second(), 60); + format_tm(time, &tm_writer_type::on_second, ns); + } + + void on_12_hour_time() { + if (handle_nan_inf()) return; + format_tm(time(), &tm_writer_type::on_12_hour_time); + } + + void on_24_hour_time() { + if (handle_nan_inf()) { + *out++ = ':'; + handle_nan_inf(); + return; + } + + write(hour(), 2); + *out++ = ':'; + write(minute(), 2); + } + + void on_iso_time() { + on_24_hour_time(); + *out++ = ':'; + if (handle_nan_inf()) return; + on_second(numeric_system::standard); + } + + void on_am_pm() { + if (handle_nan_inf()) return; + format_tm(time(), &tm_writer_type::on_am_pm); + } + + void on_duration_value() { + if (handle_nan_inf()) return; + write_sign(); + out = format_duration_value(out, val, precision); + } + + void on_duration_unit() { + out = format_duration_unit(out); + } +}; + +FMT_END_DETAIL_NAMESPACE + +#if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907 +using weekday = std::chrono::weekday; +#else +// A fallback version of weekday. +class weekday { + private: + unsigned char value; + + public: + weekday() = default; + explicit constexpr weekday(unsigned wd) noexcept + : value(static_cast(wd != 7 ? wd : 0)) {} + constexpr unsigned c_encoding() const noexcept { return value; } +}; + +class year_month_day {}; +#endif + +// A rudimentary weekday formatter. +template struct formatter { + private: + bool localized = false; + + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto begin = ctx.begin(), end = ctx.end(); + if (begin != end && *begin == 'L') { + ++begin; + localized = true; + } + return begin; + } + + template + auto format(weekday wd, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_wday = static_cast(wd.c_encoding()); + detail::get_locale loc(localized, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_abbr_weekday(); + return w.out(); + } +}; + +template +struct formatter, Char> { + private: + basic_format_specs specs; + int precision = -1; + using arg_ref_type = detail::arg_ref; + arg_ref_type width_ref; + arg_ref_type precision_ref; + bool localized = false; + basic_string_view format_str; + using duration = std::chrono::duration; + + struct spec_handler { + formatter& f; + basic_format_parse_context& context; + basic_string_view format_str; + + template FMT_CONSTEXPR arg_ref_type make_arg_ref(Id arg_id) { + context.check_arg_id(arg_id); + return arg_ref_type(arg_id); + } + + FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view arg_id) { + context.check_arg_id(arg_id); + return arg_ref_type(arg_id); + } + + FMT_CONSTEXPR arg_ref_type make_arg_ref(detail::auto_id) { + return arg_ref_type(context.next_arg_id()); + } + + void on_error(const char* msg) { FMT_THROW(format_error(msg)); } + FMT_CONSTEXPR void on_fill(basic_string_view fill) { + f.specs.fill = fill; + } + FMT_CONSTEXPR void on_align(align_t align) { f.specs.align = align; } + FMT_CONSTEXPR void on_width(int width) { f.specs.width = width; } + FMT_CONSTEXPR void on_precision(int _precision) { + f.precision = _precision; + } + FMT_CONSTEXPR void end_precision() {} + + template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { + f.width_ref = make_arg_ref(arg_id); + } + + template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { + f.precision_ref = make_arg_ref(arg_id); + } + }; + + using iterator = typename basic_format_parse_context::iterator; + struct parse_range { + iterator begin; + iterator end; + }; + + FMT_CONSTEXPR parse_range do_parse(basic_format_parse_context& ctx) { + auto begin = ctx.begin(), end = ctx.end(); + if (begin == end || *begin == '}') return {begin, begin}; + spec_handler handler{*this, ctx, format_str}; + begin = detail::parse_align(begin, end, handler); + if (begin == end) return {begin, begin}; + begin = detail::parse_width(begin, end, handler); + if (begin == end) return {begin, begin}; + if (*begin == '.') { + if (std::is_floating_point::value) + begin = detail::parse_precision(begin, end, handler); + else + handler.on_error("precision not allowed for this argument type"); + } + if (begin != end && *begin == 'L') { + ++begin; + localized = true; + } + end = detail::parse_chrono_format(begin, end, + detail::chrono_format_checker()); + return {begin, end}; + } + + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto range = do_parse(ctx); + format_str = basic_string_view( + &*range.begin, detail::to_unsigned(range.end - range.begin)); + return range.end; + } + + template + auto format(const duration& d, FormatContext& ctx) const + -> decltype(ctx.out()) { + auto specs_copy = specs; + auto precision_copy = precision; + auto begin = format_str.begin(), end = format_str.end(); + // As a possible future optimization, we could avoid extra copying if width + // is not specified. + basic_memory_buffer buf; + auto out = std::back_inserter(buf); + detail::handle_dynamic_spec(specs_copy.width, + width_ref, ctx); + detail::handle_dynamic_spec(precision_copy, + precision_ref, ctx); + if (begin == end || *begin == '}') { + out = detail::format_duration_value(out, d.count(), precision_copy); + detail::format_duration_unit(out); + } else { + detail::chrono_formatter f( + ctx, out, d); + f.precision = precision_copy; + f.localized = localized; + detail::parse_chrono_format(begin, end, f); + } + return detail::write( + ctx.out(), basic_string_view(buf.data(), buf.size()), specs_copy); + } +}; + +template +struct formatter, + Char> : formatter { + FMT_CONSTEXPR formatter() { + basic_string_view default_specs = + detail::string_literal{}; + this->do_parse(default_specs.begin(), default_specs.end()); + } + + template + auto format(std::chrono::time_point val, + FormatContext& ctx) const -> decltype(ctx.out()) { + return formatter::format(localtime(val), ctx); + } +}; + +template struct formatter { + private: + enum class spec { + unknown, + year_month_day, + hh_mm_ss, + }; + spec spec_ = spec::unknown; + basic_string_view specs; + + protected: + template FMT_CONSTEXPR auto do_parse(It begin, It end) -> It { + if (begin != end && *begin == ':') ++begin; + end = detail::parse_chrono_format(begin, end, detail::tm_format_checker()); + // Replace default spec only if the new spec is not empty. + if (end != begin) specs = {begin, detail::to_unsigned(end - begin)}; + return end; + } + + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto end = this->do_parse(ctx.begin(), ctx.end()); + // basic_string_view<>::compare isn't constexpr before C++17. + if (specs.size() == 2 && specs[0] == Char('%')) { + if (specs[1] == Char('F')) + spec_ = spec::year_month_day; + else if (specs[1] == Char('T')) + spec_ = spec::hh_mm_ss; + } + return end; + } + + template + auto format(const std::tm& tm, FormatContext& ctx) const + -> decltype(ctx.out()) { + const auto loc_ref = ctx.locale(); + detail::get_locale loc(static_cast(loc_ref), loc_ref); + auto w = detail::tm_writer(loc, ctx.out(), tm); + if (spec_ == spec::year_month_day) + w.on_iso_date(); + else if (spec_ == spec::hh_mm_ss) + w.on_iso_time(); + else + detail::parse_chrono_format(specs.begin(), specs.end(), w); + return w.out(); + } +}; + +FMT_MODULE_EXPORT_END +FMT_END_NAMESPACE + +#endif // FMT_CHRONO_H_ diff --git a/libkram/fmt/color.h b/libkram/fmt/color.h new file mode 100644 index 00000000..06b90ba1 --- /dev/null +++ b/libkram/fmt/color.h @@ -0,0 +1,651 @@ +// Formatting library for C++ - color support +// +// Copyright (c) 2018 - present, Victor Zverovich and fmt contributors +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_COLOR_H_ +#define FMT_COLOR_H_ + +#include "format.h" + +FMT_BEGIN_NAMESPACE +FMT_MODULE_EXPORT_BEGIN + +enum class color : uint32_t { + alice_blue = 0xF0F8FF, // rgb(240,248,255) + antique_white = 0xFAEBD7, // rgb(250,235,215) + aqua = 0x00FFFF, // rgb(0,255,255) + aquamarine = 0x7FFFD4, // rgb(127,255,212) + azure = 0xF0FFFF, // rgb(240,255,255) + beige = 0xF5F5DC, // rgb(245,245,220) + bisque = 0xFFE4C4, // rgb(255,228,196) + black = 0x000000, // rgb(0,0,0) + blanched_almond = 0xFFEBCD, // rgb(255,235,205) + blue = 0x0000FF, // rgb(0,0,255) + blue_violet = 0x8A2BE2, // rgb(138,43,226) + brown = 0xA52A2A, // rgb(165,42,42) + burly_wood = 0xDEB887, // rgb(222,184,135) + cadet_blue = 0x5F9EA0, // rgb(95,158,160) + chartreuse = 0x7FFF00, // rgb(127,255,0) + chocolate = 0xD2691E, // rgb(210,105,30) + coral = 0xFF7F50, // rgb(255,127,80) + cornflower_blue = 0x6495ED, // rgb(100,149,237) + cornsilk = 0xFFF8DC, // rgb(255,248,220) + crimson = 0xDC143C, // rgb(220,20,60) + cyan = 0x00FFFF, // rgb(0,255,255) + dark_blue = 0x00008B, // rgb(0,0,139) + dark_cyan = 0x008B8B, // rgb(0,139,139) + dark_golden_rod = 0xB8860B, // rgb(184,134,11) + dark_gray = 0xA9A9A9, // rgb(169,169,169) + dark_green = 0x006400, // rgb(0,100,0) + dark_khaki = 0xBDB76B, // rgb(189,183,107) + dark_magenta = 0x8B008B, // rgb(139,0,139) + dark_olive_green = 0x556B2F, // rgb(85,107,47) + dark_orange = 0xFF8C00, // rgb(255,140,0) + dark_orchid = 0x9932CC, // rgb(153,50,204) + dark_red = 0x8B0000, // rgb(139,0,0) + dark_salmon = 0xE9967A, // rgb(233,150,122) + dark_sea_green = 0x8FBC8F, // rgb(143,188,143) + dark_slate_blue = 0x483D8B, // rgb(72,61,139) + dark_slate_gray = 0x2F4F4F, // rgb(47,79,79) + dark_turquoise = 0x00CED1, // rgb(0,206,209) + dark_violet = 0x9400D3, // rgb(148,0,211) + deep_pink = 0xFF1493, // rgb(255,20,147) + deep_sky_blue = 0x00BFFF, // rgb(0,191,255) + dim_gray = 0x696969, // rgb(105,105,105) + dodger_blue = 0x1E90FF, // rgb(30,144,255) + fire_brick = 0xB22222, // rgb(178,34,34) + floral_white = 0xFFFAF0, // rgb(255,250,240) + forest_green = 0x228B22, // rgb(34,139,34) + fuchsia = 0xFF00FF, // rgb(255,0,255) + gainsboro = 0xDCDCDC, // rgb(220,220,220) + ghost_white = 0xF8F8FF, // rgb(248,248,255) + gold = 0xFFD700, // rgb(255,215,0) + golden_rod = 0xDAA520, // rgb(218,165,32) + gray = 0x808080, // rgb(128,128,128) + green = 0x008000, // rgb(0,128,0) + green_yellow = 0xADFF2F, // rgb(173,255,47) + honey_dew = 0xF0FFF0, // rgb(240,255,240) + hot_pink = 0xFF69B4, // rgb(255,105,180) + indian_red = 0xCD5C5C, // rgb(205,92,92) + indigo = 0x4B0082, // rgb(75,0,130) + ivory = 0xFFFFF0, // rgb(255,255,240) + khaki = 0xF0E68C, // rgb(240,230,140) + lavender = 0xE6E6FA, // rgb(230,230,250) + lavender_blush = 0xFFF0F5, // rgb(255,240,245) + lawn_green = 0x7CFC00, // rgb(124,252,0) + lemon_chiffon = 0xFFFACD, // rgb(255,250,205) + light_blue = 0xADD8E6, // rgb(173,216,230) + light_coral = 0xF08080, // rgb(240,128,128) + light_cyan = 0xE0FFFF, // rgb(224,255,255) + light_golden_rod_yellow = 0xFAFAD2, // rgb(250,250,210) + light_gray = 0xD3D3D3, // rgb(211,211,211) + light_green = 0x90EE90, // rgb(144,238,144) + light_pink = 0xFFB6C1, // rgb(255,182,193) + light_salmon = 0xFFA07A, // rgb(255,160,122) + light_sea_green = 0x20B2AA, // rgb(32,178,170) + light_sky_blue = 0x87CEFA, // rgb(135,206,250) + light_slate_gray = 0x778899, // rgb(119,136,153) + light_steel_blue = 0xB0C4DE, // rgb(176,196,222) + light_yellow = 0xFFFFE0, // rgb(255,255,224) + lime = 0x00FF00, // rgb(0,255,0) + lime_green = 0x32CD32, // rgb(50,205,50) + linen = 0xFAF0E6, // rgb(250,240,230) + magenta = 0xFF00FF, // rgb(255,0,255) + maroon = 0x800000, // rgb(128,0,0) + medium_aquamarine = 0x66CDAA, // rgb(102,205,170) + medium_blue = 0x0000CD, // rgb(0,0,205) + medium_orchid = 0xBA55D3, // rgb(186,85,211) + medium_purple = 0x9370DB, // rgb(147,112,219) + medium_sea_green = 0x3CB371, // rgb(60,179,113) + medium_slate_blue = 0x7B68EE, // rgb(123,104,238) + medium_spring_green = 0x00FA9A, // rgb(0,250,154) + medium_turquoise = 0x48D1CC, // rgb(72,209,204) + medium_violet_red = 0xC71585, // rgb(199,21,133) + midnight_blue = 0x191970, // rgb(25,25,112) + mint_cream = 0xF5FFFA, // rgb(245,255,250) + misty_rose = 0xFFE4E1, // rgb(255,228,225) + moccasin = 0xFFE4B5, // rgb(255,228,181) + navajo_white = 0xFFDEAD, // rgb(255,222,173) + navy = 0x000080, // rgb(0,0,128) + old_lace = 0xFDF5E6, // rgb(253,245,230) + olive = 0x808000, // rgb(128,128,0) + olive_drab = 0x6B8E23, // rgb(107,142,35) + orange = 0xFFA500, // rgb(255,165,0) + orange_red = 0xFF4500, // rgb(255,69,0) + orchid = 0xDA70D6, // rgb(218,112,214) + pale_golden_rod = 0xEEE8AA, // rgb(238,232,170) + pale_green = 0x98FB98, // rgb(152,251,152) + pale_turquoise = 0xAFEEEE, // rgb(175,238,238) + pale_violet_red = 0xDB7093, // rgb(219,112,147) + papaya_whip = 0xFFEFD5, // rgb(255,239,213) + peach_puff = 0xFFDAB9, // rgb(255,218,185) + peru = 0xCD853F, // rgb(205,133,63) + pink = 0xFFC0CB, // rgb(255,192,203) + plum = 0xDDA0DD, // rgb(221,160,221) + powder_blue = 0xB0E0E6, // rgb(176,224,230) + purple = 0x800080, // rgb(128,0,128) + rebecca_purple = 0x663399, // rgb(102,51,153) + red = 0xFF0000, // rgb(255,0,0) + rosy_brown = 0xBC8F8F, // rgb(188,143,143) + royal_blue = 0x4169E1, // rgb(65,105,225) + saddle_brown = 0x8B4513, // rgb(139,69,19) + salmon = 0xFA8072, // rgb(250,128,114) + sandy_brown = 0xF4A460, // rgb(244,164,96) + sea_green = 0x2E8B57, // rgb(46,139,87) + sea_shell = 0xFFF5EE, // rgb(255,245,238) + sienna = 0xA0522D, // rgb(160,82,45) + silver = 0xC0C0C0, // rgb(192,192,192) + sky_blue = 0x87CEEB, // rgb(135,206,235) + slate_blue = 0x6A5ACD, // rgb(106,90,205) + slate_gray = 0x708090, // rgb(112,128,144) + snow = 0xFFFAFA, // rgb(255,250,250) + spring_green = 0x00FF7F, // rgb(0,255,127) + steel_blue = 0x4682B4, // rgb(70,130,180) + tan = 0xD2B48C, // rgb(210,180,140) + teal = 0x008080, // rgb(0,128,128) + thistle = 0xD8BFD8, // rgb(216,191,216) + tomato = 0xFF6347, // rgb(255,99,71) + turquoise = 0x40E0D0, // rgb(64,224,208) + violet = 0xEE82EE, // rgb(238,130,238) + wheat = 0xF5DEB3, // rgb(245,222,179) + white = 0xFFFFFF, // rgb(255,255,255) + white_smoke = 0xF5F5F5, // rgb(245,245,245) + yellow = 0xFFFF00, // rgb(255,255,0) + yellow_green = 0x9ACD32 // rgb(154,205,50) +}; // enum class color + +enum class terminal_color : uint8_t { + black = 30, + red, + green, + yellow, + blue, + magenta, + cyan, + white, + bright_black = 90, + bright_red, + bright_green, + bright_yellow, + bright_blue, + bright_magenta, + bright_cyan, + bright_white +}; + +enum class emphasis : uint8_t { + bold = 1, + faint = 1 << 1, + italic = 1 << 2, + underline = 1 << 3, + blink = 1 << 4, + reverse = 1 << 5, + conceal = 1 << 6, + strikethrough = 1 << 7, +}; + +// rgb is a struct for red, green and blue colors. +// Using the name "rgb" makes some editors show the color in a tooltip. +struct rgb { + FMT_CONSTEXPR rgb() : r(0), g(0), b(0) {} + FMT_CONSTEXPR rgb(uint8_t r_, uint8_t g_, uint8_t b_) : r(r_), g(g_), b(b_) {} + FMT_CONSTEXPR rgb(uint32_t hex) + : r((hex >> 16) & 0xFF), g((hex >> 8) & 0xFF), b(hex & 0xFF) {} + FMT_CONSTEXPR rgb(color hex) + : r((uint32_t(hex) >> 16) & 0xFF), + g((uint32_t(hex) >> 8) & 0xFF), + b(uint32_t(hex) & 0xFF) {} + uint8_t r; + uint8_t g; + uint8_t b; +}; + +FMT_BEGIN_DETAIL_NAMESPACE + +// color is a struct of either a rgb color or a terminal color. +struct color_type { + FMT_CONSTEXPR color_type() noexcept : is_rgb(), value{} {} + FMT_CONSTEXPR color_type(color rgb_color) noexcept : is_rgb(true), value{} { + value.rgb_color = static_cast(rgb_color); + } + FMT_CONSTEXPR color_type(rgb rgb_color) noexcept : is_rgb(true), value{} { + value.rgb_color = (static_cast(rgb_color.r) << 16) | + (static_cast(rgb_color.g) << 8) | rgb_color.b; + } + FMT_CONSTEXPR color_type(terminal_color term_color) noexcept + : is_rgb(), value{} { + value.term_color = static_cast(term_color); + } + bool is_rgb; + union color_union { + uint8_t term_color; + uint32_t rgb_color; + } value; +}; + +FMT_END_DETAIL_NAMESPACE + +/** A text style consisting of foreground and background colors and emphasis. */ +class text_style { + public: + FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept + : set_foreground_color(), set_background_color(), ems(em) {} + + FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + if (!set_foreground_color) { + set_foreground_color = rhs.set_foreground_color; + foreground_color = rhs.foreground_color; + } else if (rhs.set_foreground_color) { + if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb) + FMT_THROW(format_error("can't OR a terminal color")); + foreground_color.value.rgb_color |= rhs.foreground_color.value.rgb_color; + } + + if (!set_background_color) { + set_background_color = rhs.set_background_color; + background_color = rhs.background_color; + } else if (rhs.set_background_color) { + if (!background_color.is_rgb || !rhs.background_color.is_rgb) + FMT_THROW(format_error("can't OR a terminal color")); + background_color.value.rgb_color |= rhs.background_color.value.rgb_color; + } + + ems = static_cast(static_cast(ems) | + static_cast(rhs.ems)); + return *this; + } + + friend FMT_CONSTEXPR text_style operator|(text_style lhs, + const text_style& rhs) { + return lhs |= rhs; + } + + FMT_CONSTEXPR bool has_foreground() const noexcept { + return set_foreground_color; + } + FMT_CONSTEXPR bool has_background() const noexcept { + return set_background_color; + } + FMT_CONSTEXPR bool has_emphasis() const noexcept { + return static_cast(ems) != 0; + } + FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_ASSERT(has_foreground(), "no foreground specified for this style"); + return foreground_color; + } + FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_ASSERT(has_background(), "no background specified for this style"); + return background_color; + } + FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); + return ems; + } + + private: + FMT_CONSTEXPR text_style(bool is_foreground, + detail::color_type text_color) noexcept + : set_foreground_color(), set_background_color(), ems() { + if (is_foreground) { + foreground_color = text_color; + set_foreground_color = true; + } else { + background_color = text_color; + set_background_color = true; + } + } + + friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + + friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + + detail::color_type foreground_color; + detail::color_type background_color; + bool set_foreground_color; + bool set_background_color; + emphasis ems; +}; + +/** Creates a text style from the foreground (text) color. */ +FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { + return text_style(true, foreground); +} + +/** Creates a text style from the background color. */ +FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { + return text_style(false, background); +} + +FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { + return text_style(lhs) | rhs; +} + +FMT_BEGIN_DETAIL_NAMESPACE + +template struct ansi_color_escape { + FMT_CONSTEXPR ansi_color_escape(detail::color_type text_color, + const char* esc) noexcept { + // If we have a terminal color, we need to output another escape code + // sequence. + if (!text_color.is_rgb) { + bool is_background = esc == string_view("\x1b[48;2;"); + uint32_t value = text_color.value.term_color; + // Background ASCII codes are the same as the foreground ones but with + // 10 more. + if (is_background) value += 10u; + + size_t index = 0; + buffer[index++] = static_cast('\x1b'); + buffer[index++] = static_cast('['); + + if (value >= 100u) { + buffer[index++] = static_cast('1'); + value %= 100u; + } + buffer[index++] = static_cast('0' + value / 10u); + buffer[index++] = static_cast('0' + value % 10u); + + buffer[index++] = static_cast('m'); + buffer[index++] = static_cast('\0'); + return; + } + + for (int i = 0; i < 7; i++) { + buffer[i] = static_cast(esc[i]); + } + rgb color(text_color.value.rgb_color); + to_esc(color.r, buffer + 7, ';'); + to_esc(color.g, buffer + 11, ';'); + to_esc(color.b, buffer + 15, 'm'); + buffer[19] = static_cast(0); + } + FMT_CONSTEXPR ansi_color_escape(emphasis em) noexcept { + uint8_t em_codes[num_emphases] = {}; + if (has_emphasis(em, emphasis::bold)) em_codes[0] = 1; + if (has_emphasis(em, emphasis::faint)) em_codes[1] = 2; + if (has_emphasis(em, emphasis::italic)) em_codes[2] = 3; + if (has_emphasis(em, emphasis::underline)) em_codes[3] = 4; + if (has_emphasis(em, emphasis::blink)) em_codes[4] = 5; + if (has_emphasis(em, emphasis::reverse)) em_codes[5] = 7; + if (has_emphasis(em, emphasis::conceal)) em_codes[6] = 8; + if (has_emphasis(em, emphasis::strikethrough)) em_codes[7] = 9; + + size_t index = 0; + for (size_t i = 0; i < num_emphases; ++i) { + if (!em_codes[i]) continue; + buffer[index++] = static_cast('\x1b'); + buffer[index++] = static_cast('['); + buffer[index++] = static_cast('0' + em_codes[i]); + buffer[index++] = static_cast('m'); + } + buffer[index++] = static_cast(0); + } + FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } + + FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } + FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { + return buffer + std::char_traits::length(buffer); + } + + private: + static constexpr size_t num_emphases = 8; + Char buffer[7u + 3u * num_emphases + 1u]; + + static FMT_CONSTEXPR void to_esc(uint8_t c, Char* out, + char delimiter) noexcept { + out[0] = static_cast('0' + c / 100); + out[1] = static_cast('0' + c / 10 % 10); + out[2] = static_cast('0' + c % 10); + out[3] = static_cast(delimiter); + } + static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + return static_cast(em) & static_cast(mask); + } +}; + +template +FMT_CONSTEXPR ansi_color_escape make_foreground_color( + detail::color_type foreground) noexcept { + return ansi_color_escape(foreground, "\x1b[38;2;"); +} + +template +FMT_CONSTEXPR ansi_color_escape make_background_color( + detail::color_type background) noexcept { + return ansi_color_escape(background, "\x1b[48;2;"); +} + +template +FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { + return ansi_color_escape(em); +} + +template inline void fputs(const Char* chars, FILE* stream) { + int result = std::fputs(chars, stream); + if (result < 0) + FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); +} + +template <> inline void fputs(const wchar_t* chars, FILE* stream) { + int result = std::fputws(chars, stream); + if (result < 0) + FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); +} + +template inline void reset_color(FILE* stream) { + fputs("\x1b[0m", stream); +} + +template <> inline void reset_color(FILE* stream) { + fputs(L"\x1b[0m", stream); +} + +template inline void reset_color(buffer& buffer) { + auto reset_color = string_view("\x1b[0m"); + buffer.append(reset_color.begin(), reset_color.end()); +} + +template struct styled_arg { + const T& value; + text_style style; +}; + +template +void vformat_to(buffer& buf, const text_style& ts, + basic_string_view format_str, + basic_format_args>> args) { + bool has_style = false; + if (ts.has_emphasis()) { + has_style = true; + auto emphasis = detail::make_emphasis(ts.get_emphasis()); + buf.append(emphasis.begin(), emphasis.end()); + } + if (ts.has_foreground()) { + has_style = true; + auto foreground = detail::make_foreground_color(ts.get_foreground()); + buf.append(foreground.begin(), foreground.end()); + } + if (ts.has_background()) { + has_style = true; + auto background = detail::make_background_color(ts.get_background()); + buf.append(background.begin(), background.end()); + } + detail::vformat_to(buf, format_str, args, {}); + if (has_style) detail::reset_color(buf); +} + +FMT_END_DETAIL_NAMESPACE + +template > +void vprint(std::FILE* f, const text_style& ts, const S& format, + basic_format_args>> args) { + basic_memory_buffer buf; + detail::vformat_to(buf, ts, detail::to_string_view(format), args); + if (detail::is_utf8()) { + detail::print(f, basic_string_view(buf.begin(), buf.size())); + } else { + buf.push_back(Char(0)); + detail::fputs(buf.data(), f); + } +} + +/** + \rst + Formats a string and prints it to the specified file stream using ANSI + escape sequences to specify text formatting. + + **Example**:: + + fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + "Elapsed time: {0:.2f} seconds", 1.23); + \endrst + */ +template ::value)> +void print(std::FILE* f, const text_style& ts, const S& format_str, + const Args&... args) { + vprint(f, ts, format_str, + fmt::make_format_args>>(args...)); +} + +/** + \rst + Formats a string and prints it to stdout using ANSI escape sequences to + specify text formatting. + + **Example**:: + + fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + "Elapsed time: {0:.2f} seconds", 1.23); + \endrst + */ +template ::value)> +void print(const text_style& ts, const S& format_str, const Args&... args) { + return print(stdout, ts, format_str, args...); +} + +template > +inline std::basic_string vformat( + const text_style& ts, const S& format_str, + basic_format_args>> args) { + basic_memory_buffer buf; + detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); + return fmt::to_string(buf); +} + +/** + \rst + Formats arguments and returns the result as a string using ANSI + escape sequences to specify text formatting. + + **Example**:: + + #include + std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red), + "The answer is {}", 42); + \endrst +*/ +template > +inline std::basic_string format(const text_style& ts, const S& format_str, + const Args&... args) { + return fmt::vformat(ts, detail::to_string_view(format_str), + fmt::make_format_args>(args...)); +} + +/** + Formats a string with the given text_style and writes the output to ``out``. + */ +template ::value)> +OutputIt vformat_to( + OutputIt out, const text_style& ts, basic_string_view format_str, + basic_format_args>> args) { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, ts, format_str, args); + return detail::get_iterator(buf, out); +} + +/** + \rst + Formats arguments with the given text_style, writes the result to the output + iterator ``out`` and returns the iterator past the end of the output range. + + **Example**:: + + std::vector out; + fmt::format_to(std::back_inserter(out), + fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); + \endrst +*/ +template >::value&& + detail::is_string::value> +inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, + Args&&... args) -> + typename std::enable_if::type { + return vformat_to(out, ts, detail::to_string_view(format_str), + fmt::make_format_args>>(args...)); +} + +template +struct formatter, Char> : formatter { + template + auto format(const detail::styled_arg& arg, FormatContext& ctx) const + -> decltype(ctx.out()) { + const auto& ts = arg.style; + const auto& value = arg.value; + auto out = ctx.out(); + + bool has_style = false; + if (ts.has_emphasis()) { + has_style = true; + auto emphasis = detail::make_emphasis(ts.get_emphasis()); + out = std::copy(emphasis.begin(), emphasis.end(), out); + } + if (ts.has_foreground()) { + has_style = true; + auto foreground = + detail::make_foreground_color(ts.get_foreground()); + out = std::copy(foreground.begin(), foreground.end(), out); + } + if (ts.has_background()) { + has_style = true; + auto background = + detail::make_background_color(ts.get_background()); + out = std::copy(background.begin(), background.end(), out); + } + out = formatter::format(value, ctx); + if (has_style) { + auto reset_color = string_view("\x1b[0m"); + out = std::copy(reset_color.begin(), reset_color.end(), out); + } + return out; + } +}; + +/** + \rst + Returns an argument that will be formatted using ANSI escape sequences, + to be used in a formatting function. + + **Example**:: + + fmt::print("Elapsed time: {0:.2f} seconds", + fmt::styled(1.23, fmt::fg(fmt::color::green) | + fmt::bg(fmt::color::blue))); + \endrst + */ +template +FMT_CONSTEXPR auto styled(const T& value, text_style ts) + -> detail::styled_arg> { + return detail::styled_arg>{value, ts}; +} + +FMT_MODULE_EXPORT_END +FMT_END_NAMESPACE + +#endif // FMT_COLOR_H_ diff --git a/libkram/fmt/compile.h b/libkram/fmt/compile.h new file mode 100644 index 00000000..933668c4 --- /dev/null +++ b/libkram/fmt/compile.h @@ -0,0 +1,611 @@ +// Formatting library for C++ - experimental format string compilation +// +// Copyright (c) 2012 - present, Victor Zverovich and fmt contributors +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_COMPILE_H_ +#define FMT_COMPILE_H_ + +#include "format.h" + +FMT_BEGIN_NAMESPACE +namespace detail { + +template +FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, + counting_iterator it) { + return it + (end - begin); +} + +template class truncating_iterator_base { + protected: + OutputIt out_; + size_t limit_; + size_t count_ = 0; + + truncating_iterator_base() : out_(), limit_(0) {} + + truncating_iterator_base(OutputIt out, size_t limit) + : out_(out), limit_(limit) {} + + public: + using iterator_category = std::output_iterator_tag; + using value_type = typename std::iterator_traits::value_type; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = void; + FMT_UNCHECKED_ITERATOR(truncating_iterator_base); + + OutputIt base() const { return out_; } + size_t count() const { return count_; } +}; + +// An output iterator that truncates the output and counts the number of objects +// written to it. +template ::value_type>::type> +class truncating_iterator; + +template +class truncating_iterator + : public truncating_iterator_base { + mutable typename truncating_iterator_base::value_type blackhole_; + + public: + using value_type = typename truncating_iterator_base::value_type; + + truncating_iterator() = default; + + truncating_iterator(OutputIt out, size_t limit) + : truncating_iterator_base(out, limit) {} + + truncating_iterator& operator++() { + if (this->count_++ < this->limit_) ++this->out_; + return *this; + } + + truncating_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + value_type& operator*() const { + return this->count_ < this->limit_ ? *this->out_ : blackhole_; + } +}; + +template +class truncating_iterator + : public truncating_iterator_base { + public: + truncating_iterator() = default; + + truncating_iterator(OutputIt out, size_t limit) + : truncating_iterator_base(out, limit) {} + + template truncating_iterator& operator=(T val) { + if (this->count_++ < this->limit_) *this->out_++ = val; + return *this; + } + + truncating_iterator& operator++() { return *this; } + truncating_iterator& operator++(int) { return *this; } + truncating_iterator& operator*() { return *this; } +}; + +// A compile-time string which is compiled into fast formatting code. +class compiled_string {}; + +template +struct is_compiled_string : std::is_base_of {}; + +/** + \rst + Converts a string literal *s* into a format string that will be parsed at + compile time and converted into efficient formatting code. Requires C++17 + ``constexpr if`` compiler support. + + **Example**:: + + // Converts 42 into std::string using the most efficient method and no + // runtime format string processing. + std::string s = fmt::format(FMT_COMPILE("{}"), 42); + \endrst + */ +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) +# define FMT_COMPILE(s) \ + FMT_STRING_IMPL(s, fmt::detail::compiled_string, explicit) +#else +# define FMT_COMPILE(s) FMT_STRING(s) +#endif + +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +template Str> +struct udl_compiled_string : compiled_string { + using char_type = Char; + explicit constexpr operator basic_string_view() const { + return {Str.data, N - 1}; + } +}; +#endif + +template +const T& first(const T& value, const Tail&...) { + return value; +} + +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) +template struct type_list {}; + +// Returns a reference to the argument at index N from [first, rest...]. +template +constexpr const auto& get([[maybe_unused]] const T& first, + [[maybe_unused]] const Args&... rest) { + static_assert(N < 1 + sizeof...(Args), "index is out of bounds"); + if constexpr (N == 0) + return first; + else + return detail::get(rest...); +} + +template +constexpr int get_arg_index_by_name(basic_string_view name, + type_list) { + return get_arg_index_by_name(name); +} + +template struct get_type_impl; + +template struct get_type_impl> { + using type = + remove_cvref_t(std::declval()...))>; +}; + +template +using get_type = typename get_type_impl::type; + +template struct is_compiled_format : std::false_type {}; + +template struct text { + basic_string_view data; + using char_type = Char; + + template + constexpr OutputIt format(OutputIt out, const Args&...) const { + return write(out, data); + } +}; + +template +struct is_compiled_format> : std::true_type {}; + +template +constexpr text make_text(basic_string_view s, size_t pos, + size_t size) { + return {{&s[pos], size}}; +} + +template struct code_unit { + Char value; + using char_type = Char; + + template + constexpr OutputIt format(OutputIt out, const Args&...) const { + return write(out, value); + } +}; + +// This ensures that the argument type is convertible to `const T&`. +template +constexpr const T& get_arg_checked(const Args&... args) { + const auto& arg = detail::get(args...); + if constexpr (detail::is_named_arg>()) { + return arg.value; + } else { + return arg; + } +} + +template +struct is_compiled_format> : std::true_type {}; + +// A replacement field that refers to argument N. +template struct field { + using char_type = Char; + + template + constexpr OutputIt format(OutputIt out, const Args&... args) const { + return write(out, get_arg_checked(args...)); + } +}; + +template +struct is_compiled_format> : std::true_type {}; + +// A replacement field that refers to argument with name. +template struct runtime_named_field { + using char_type = Char; + basic_string_view name; + + template + constexpr static bool try_format_argument( + OutputIt& out, + // [[maybe_unused]] due to unused-but-set-parameter warning in GCC 7,8,9 + [[maybe_unused]] basic_string_view arg_name, const T& arg) { + if constexpr (is_named_arg::type>::value) { + if (arg_name == arg.name) { + out = write(out, arg.value); + return true; + } + } + return false; + } + + template + constexpr OutputIt format(OutputIt out, const Args&... args) const { + bool found = (try_format_argument(out, name, args) || ...); + if (!found) { + FMT_THROW(format_error("argument with specified name is not found")); + } + return out; + } +}; + +template +struct is_compiled_format> : std::true_type {}; + +// A replacement field that refers to argument N and has format specifiers. +template struct spec_field { + using char_type = Char; + formatter fmt; + + template + constexpr FMT_INLINE OutputIt format(OutputIt out, + const Args&... args) const { + const auto& vargs = + fmt::make_format_args>(args...); + basic_format_context ctx(out, vargs); + return fmt.format(get_arg_checked(args...), ctx); + } +}; + +template +struct is_compiled_format> : std::true_type {}; + +template struct concat { + L lhs; + R rhs; + using char_type = typename L::char_type; + + template + constexpr OutputIt format(OutputIt out, const Args&... args) const { + out = lhs.format(out, args...); + return rhs.format(out, args...); + } +}; + +template +struct is_compiled_format> : std::true_type {}; + +template +constexpr concat make_concat(L lhs, R rhs) { + return {lhs, rhs}; +} + +struct unknown_format {}; + +template +constexpr size_t parse_text(basic_string_view str, size_t pos) { + for (size_t size = str.size(); pos != size; ++pos) { + if (str[pos] == '{' || str[pos] == '}') break; + } + return pos; +} + +template +constexpr auto compile_format_string(S format_str); + +template +constexpr auto parse_tail(T head, S format_str) { + if constexpr (POS != + basic_string_view(format_str).size()) { + constexpr auto tail = compile_format_string(format_str); + if constexpr (std::is_same, + unknown_format>()) + return tail; + else + return make_concat(head, tail); + } else { + return head; + } +} + +template struct parse_specs_result { + formatter fmt; + size_t end; + int next_arg_id; +}; + +constexpr int manual_indexing_id = -1; + +template +constexpr parse_specs_result parse_specs(basic_string_view str, + size_t pos, int next_arg_id) { + str.remove_prefix(pos); + auto ctx = compile_parse_context(str, max_value(), nullptr, {}, + next_arg_id); + auto f = formatter(); + auto end = f.parse(ctx); + return {f, pos + fmt::detail::to_unsigned(end - str.data()), + next_arg_id == 0 ? manual_indexing_id : ctx.next_arg_id()}; +} + +template struct arg_id_handler { + arg_ref arg_id; + + constexpr int operator()() { + FMT_ASSERT(false, "handler cannot be used with automatic indexing"); + return 0; + } + constexpr int operator()(int id) { + arg_id = arg_ref(id); + return 0; + } + constexpr int operator()(basic_string_view id) { + arg_id = arg_ref(id); + return 0; + } + + constexpr void on_error(const char* message) { + FMT_THROW(format_error(message)); + } +}; + +template struct parse_arg_id_result { + arg_ref arg_id; + const Char* arg_id_end; +}; + +template +constexpr auto parse_arg_id(const Char* begin, const Char* end) { + auto handler = arg_id_handler{arg_ref{}}; + auto arg_id_end = parse_arg_id(begin, end, handler); + return parse_arg_id_result{handler.arg_id, arg_id_end}; +} + +template struct field_type { + using type = remove_cvref_t; +}; + +template +struct field_type::value>> { + using type = remove_cvref_t; +}; + +template +constexpr auto parse_replacement_field_then_tail(S format_str) { + using char_type = typename S::char_type; + constexpr auto str = basic_string_view(format_str); + constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type(); + if constexpr (c == '}') { + return parse_tail( + field::type, ARG_INDEX>(), + format_str); + } else if constexpr (c != ':') { + FMT_THROW(format_error("expected ':'")); + } else { + constexpr auto result = parse_specs::type>( + str, END_POS + 1, NEXT_ID == manual_indexing_id ? 0 : NEXT_ID); + if constexpr (result.end >= str.size() || str[result.end] != '}') { + FMT_THROW(format_error("expected '}'")); + return 0; + } else { + return parse_tail( + spec_field::type, ARG_INDEX>{ + result.fmt}, + format_str); + } + } +} + +// Compiles a non-empty format string and returns the compiled representation +// or unknown_format() on unrecognized input. +template +constexpr auto compile_format_string(S format_str) { + using char_type = typename S::char_type; + constexpr auto str = basic_string_view(format_str); + if constexpr (str[POS] == '{') { + if constexpr (POS + 1 == str.size()) + FMT_THROW(format_error("unmatched '{' in format string")); + if constexpr (str[POS + 1] == '{') { + return parse_tail(make_text(str, POS, 1), format_str); + } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') { + static_assert(ID != manual_indexing_id, + "cannot switch from manual to automatic argument indexing"); + constexpr auto next_id = + ID != manual_indexing_id ? ID + 1 : manual_indexing_id; + return parse_replacement_field_then_tail, Args, + POS + 1, ID, next_id>( + format_str); + } else { + constexpr auto arg_id_result = + parse_arg_id(str.data() + POS + 1, str.data() + str.size()); + constexpr auto arg_id_end_pos = arg_id_result.arg_id_end - str.data(); + constexpr char_type c = + arg_id_end_pos != str.size() ? str[arg_id_end_pos] : char_type(); + static_assert(c == '}' || c == ':', "missing '}' in format string"); + if constexpr (arg_id_result.arg_id.kind == arg_id_kind::index) { + static_assert( + ID == manual_indexing_id || ID == 0, + "cannot switch from automatic to manual argument indexing"); + constexpr auto arg_index = arg_id_result.arg_id.val.index; + return parse_replacement_field_then_tail, + Args, arg_id_end_pos, + arg_index, manual_indexing_id>( + format_str); + } else if constexpr (arg_id_result.arg_id.kind == arg_id_kind::name) { + constexpr auto arg_index = + get_arg_index_by_name(arg_id_result.arg_id.val.name, Args{}); + if constexpr (arg_index != invalid_arg_index) { + constexpr auto next_id = + ID != manual_indexing_id ? ID + 1 : manual_indexing_id; + return parse_replacement_field_then_tail< + decltype(get_type::value), Args, arg_id_end_pos, + arg_index, next_id>(format_str); + } else { + if constexpr (c == '}') { + return parse_tail( + runtime_named_field{arg_id_result.arg_id.val.name}, + format_str); + } else if constexpr (c == ':') { + return unknown_format(); // no type info for specs parsing + } + } + } + } + } else if constexpr (str[POS] == '}') { + if constexpr (POS + 1 == str.size()) + FMT_THROW(format_error("unmatched '}' in format string")); + return parse_tail(make_text(str, POS, 1), format_str); + } else { + constexpr auto end = parse_text(str, POS + 1); + if constexpr (end - POS > 1) { + return parse_tail(make_text(str, POS, end - POS), + format_str); + } else { + return parse_tail(code_unit{str[POS]}, + format_str); + } + } +} + +template ::value)> +constexpr auto compile(S format_str) { + constexpr auto str = basic_string_view(format_str); + if constexpr (str.size() == 0) { + return detail::make_text(str, 0, 0); + } else { + constexpr auto result = + detail::compile_format_string, 0, 0>( + format_str); + return result; + } +} +#endif // defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) +} // namespace detail + +FMT_MODULE_EXPORT_BEGIN + +#if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) + +template ::value)> +FMT_INLINE std::basic_string format(const CompiledFormat& cf, + const Args&... args) { + auto s = std::basic_string(); + cf.format(std::back_inserter(s), args...); + return s; +} + +template ::value)> +constexpr FMT_INLINE OutputIt format_to(OutputIt out, const CompiledFormat& cf, + const Args&... args) { + return cf.format(out, args...); +} + +template ::value)> +FMT_INLINE std::basic_string format(const S&, + Args&&... args) { + if constexpr (std::is_same::value) { + constexpr auto str = basic_string_view(S()); + if constexpr (str.size() == 2 && str[0] == '{' && str[1] == '}') { + const auto& first = detail::first(args...); + if constexpr (detail::is_named_arg< + remove_cvref_t>::value) { + return fmt::to_string(first.value); + } else { + return fmt::to_string(first); + } + } + } + constexpr auto compiled = detail::compile(S()); + if constexpr (std::is_same, + detail::unknown_format>()) { + return fmt::format( + static_cast>(S()), + std::forward(args)...); + } else { + return fmt::format(compiled, std::forward(args)...); + } +} + +template ::value)> +FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { + constexpr auto compiled = detail::compile(S()); + if constexpr (std::is_same, + detail::unknown_format>()) { + return fmt::format_to( + out, static_cast>(S()), + std::forward(args)...); + } else { + return fmt::format_to(out, compiled, std::forward(args)...); + } +} +#endif + +template ::value)> +format_to_n_result format_to_n(OutputIt out, size_t n, + const S& format_str, Args&&... args) { + auto it = fmt::format_to(detail::truncating_iterator(out, n), + format_str, std::forward(args)...); + return {it.base(), it.count()}; +} + +template ::value)> +FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, + const Args&... args) { + return fmt::format_to(detail::counting_iterator(), format_str, args...) + .count(); +} + +template ::value)> +void print(std::FILE* f, const S& format_str, const Args&... args) { + memory_buffer buffer; + fmt::format_to(std::back_inserter(buffer), format_str, args...); + detail::print(f, {buffer.data(), buffer.size()}); +} + +template ::value)> +void print(const S& format_str, const Args&... args) { + print(stdout, format_str, args...); +} + +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +inline namespace literals { +template constexpr auto operator""_cf() { + using char_t = remove_cvref_t; + return detail::udl_compiled_string(); +} +} // namespace literals +#endif + +FMT_MODULE_EXPORT_END +FMT_END_NAMESPACE + +#endif // FMT_COMPILE_H_ diff --git a/libkram/fmt/core.h b/libkram/fmt/core.h new file mode 100644 index 00000000..549f948f --- /dev/null +++ b/libkram/fmt/core.h @@ -0,0 +1,3338 @@ +// Formatting library for C++ - the core API for char/UTF-8 +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_CORE_H_ +#define FMT_CORE_H_ + +#include // std::byte +#include // std::FILE +#include // std::strlen +#include +#include +#include +#include + +// The fmt library version in the form major * 10000 + minor * 100 + patch. +#define FMT_VERSION 90101 + +#if defined(__clang__) && !defined(__ibmxl__) +# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) +#else +# define FMT_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \ + !defined(__NVCOMPILER) +# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define FMT_GCC_VERSION 0 +#endif + +#ifndef FMT_GCC_PRAGMA +// Workaround _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884. +# if FMT_GCC_VERSION >= 504 +# define FMT_GCC_PRAGMA(arg) _Pragma(arg) +# else +# define FMT_GCC_PRAGMA(arg) +# endif +#endif + +#ifdef __ICL +# define FMT_ICC_VERSION __ICL +#elif defined(__INTEL_COMPILER) +# define FMT_ICC_VERSION __INTEL_COMPILER +#else +# define FMT_ICC_VERSION 0 +#endif + +#ifdef _MSC_VER +# define FMT_MSC_VERSION _MSC_VER +# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) +#else +# define FMT_MSC_VERSION 0 +# define FMT_MSC_WARNING(...) +#endif + +#ifdef _MSVC_LANG +# define FMT_CPLUSPLUS _MSVC_LANG +#else +# define FMT_CPLUSPLUS __cplusplus +#endif + +#ifdef __has_feature +# define FMT_HAS_FEATURE(x) __has_feature(x) +#else +# define FMT_HAS_FEATURE(x) 0 +#endif + +#if defined(__has_include) || FMT_ICC_VERSION >= 1600 || FMT_MSC_VERSION > 1900 +# define FMT_HAS_INCLUDE(x) __has_include(x) +#else +# define FMT_HAS_INCLUDE(x) 0 +#endif + +#ifdef __has_cpp_attribute +# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define FMT_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +// Check if relaxed C++14 constexpr is supported. +// GCC doesn't allow throw in constexpr until version 6 (bug 67371). +#ifndef FMT_USE_CONSTEXPR +# if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \ + (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) && \ + !FMT_ICC_VERSION && !defined(__NVCC__) +# define FMT_USE_CONSTEXPR 1 +# else +# define FMT_USE_CONSTEXPR 0 +# endif +#endif +#if FMT_USE_CONSTEXPR +# define FMT_CONSTEXPR constexpr +#else +# define FMT_CONSTEXPR +#endif + +#if ((FMT_CPLUSPLUS >= 202002L) && \ + (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ + (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) +# define FMT_CONSTEXPR20 constexpr +#else +# define FMT_CONSTEXPR20 +#endif + +// Check if constexpr std::char_traits<>::{compare,length} are supported. +#if defined(__GLIBCXX__) +# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ + _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +# endif +#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ + _LIBCPP_VERSION >= 4000 +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L +# define FMT_CONSTEXPR_CHAR_TRAITS constexpr +#endif +#ifndef FMT_CONSTEXPR_CHAR_TRAITS +# define FMT_CONSTEXPR_CHAR_TRAITS +#endif + +// Check if exceptions are disabled. +#ifndef FMT_EXCEPTIONS +# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ + (FMT_MSC_VERSION && !_HAS_EXCEPTIONS) +# define FMT_EXCEPTIONS 0 +# else +# define FMT_EXCEPTIONS 1 +# endif +#endif + +#ifndef FMT_DEPRECATED +# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 +# define FMT_DEPRECATED [[deprecated]] +# else +# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) +# define FMT_DEPRECATED __attribute__((deprecated)) +# elif FMT_MSC_VERSION +# define FMT_DEPRECATED __declspec(deprecated) +# else +# define FMT_DEPRECATED /* deprecated */ +# endif +# endif +#endif + +// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code +// warnings. +#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && \ + !defined(__NVCC__) +# define FMT_NORETURN [[noreturn]] +#else +# define FMT_NORETURN +#endif + +#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough) +# define FMT_FALLTHROUGH [[fallthrough]] +#elif defined(__clang__) +# define FMT_FALLTHROUGH [[clang::fallthrough]] +#elif FMT_GCC_VERSION >= 700 && \ + (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) +# define FMT_FALLTHROUGH [[gnu::fallthrough]] +#else +# define FMT_FALLTHROUGH +#endif + +#ifndef FMT_NODISCARD +# if FMT_HAS_CPP17_ATTRIBUTE(nodiscard) +# define FMT_NODISCARD [[nodiscard]] +# else +# define FMT_NODISCARD +# endif +#endif + +#ifndef FMT_USE_FLOAT +# define FMT_USE_FLOAT 1 +#endif +#ifndef FMT_USE_DOUBLE +# define FMT_USE_DOUBLE 1 +#endif +#ifndef FMT_USE_LONG_DOUBLE +# define FMT_USE_LONG_DOUBLE 1 +#endif + +#ifndef FMT_INLINE +# if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_INLINE inline __attribute__((always_inline)) +# else +# define FMT_INLINE inline +# endif +#endif + +// An inline std::forward replacement. +#define FMT_FORWARD(...) static_cast(__VA_ARGS__) + +#ifdef _MSC_VER +# define FMT_UNCHECKED_ITERATOR(It) \ + using _Unchecked_type = It // Mark iterator as checked. +#else +# define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It +#endif + +#ifndef FMT_BEGIN_NAMESPACE +# define FMT_BEGIN_NAMESPACE \ + namespace fmt { \ + inline namespace v9 { +# define FMT_END_NAMESPACE \ + } \ + } +#endif + +#ifndef FMT_MODULE_EXPORT +# define FMT_MODULE_EXPORT +# define FMT_MODULE_EXPORT_BEGIN +# define FMT_MODULE_EXPORT_END +# define FMT_BEGIN_DETAIL_NAMESPACE namespace detail { +# define FMT_END_DETAIL_NAMESPACE } +#endif + +#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) +# define FMT_CLASS_API FMT_MSC_WARNING(suppress : 4275) +# ifdef FMT_EXPORT +# define FMT_API __declspec(dllexport) +# elif defined(FMT_SHARED) +# define FMT_API __declspec(dllimport) +# endif +#else +# define FMT_CLASS_API +# if defined(FMT_EXPORT) || defined(FMT_SHARED) +# if defined(__GNUC__) || defined(__clang__) +# define FMT_API __attribute__((visibility("default"))) +# endif +# endif +#endif +#ifndef FMT_API +# define FMT_API +#endif + +// libc++ supports string_view in pre-c++17. +#if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L +# include +# define FMT_USE_EXPERIMENTAL_STRING_VIEW +#endif + +#ifndef FMT_UNICODE +# define FMT_UNICODE !FMT_MSC_VERSION +#endif + +#ifndef FMT_CONSTEVAL +# if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) && \ + FMT_CPLUSPLUS >= 202002L && !defined(__apple_build_version__)) || \ + (defined(__cpp_consteval) && \ + (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) +// consteval is broken in MSVC before VS2022 and Apple clang 13. +# define FMT_CONSTEVAL consteval +# define FMT_HAS_CONSTEVAL +# else +# define FMT_CONSTEVAL +# endif +#endif + +#ifndef FMT_USE_NONTYPE_TEMPLATE_ARGS +# if defined(__cpp_nontype_template_args) && \ + ((FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L) || \ + __cpp_nontype_template_args >= 201911L) && \ + !defined(__NVCOMPILER) && !defined(__LCC__) +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +# else +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 +# endif +#endif + +// Enable minimal optimizations for more compact code in debug mode. +FMT_GCC_PRAGMA("GCC push_options") +#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) +FMT_GCC_PRAGMA("GCC optimize(\"Og\")") +#endif + +FMT_BEGIN_NAMESPACE +FMT_MODULE_EXPORT_BEGIN + +// Implementations of enable_if_t and other metafunctions for older systems. +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; +template using bool_constant = std::integral_constant; +template +using remove_reference_t = typename std::remove_reference::type; +template +using remove_const_t = typename std::remove_const::type; +template +using remove_cvref_t = typename std::remove_cv>::type; +template struct type_identity { using type = T; }; +template using type_identity_t = typename type_identity::type; +template +using underlying_t = typename std::underlying_type::type; + +template struct disjunction : std::false_type {}; +template struct disjunction

: P {}; +template +struct disjunction + : conditional_t> {}; + +template struct conjunction : std::true_type {}; +template struct conjunction

: P {}; +template +struct conjunction + : conditional_t, P1> {}; + +struct monostate { + constexpr monostate() {} +}; + +// An enable_if helper to be used in template parameters which results in much +// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed +// to workaround a bug in MSVC 2019 (see #1140 and #1186). +#ifdef FMT_DOC +# define FMT_ENABLE_IF(...) +#else +# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0 +#endif + +FMT_BEGIN_DETAIL_NAMESPACE + +// Suppresses "unused variable" warnings with the method described in +// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/. +// (void)var does not work on many Intel compilers. +template FMT_CONSTEXPR void ignore_unused(const T&...) {} + +constexpr FMT_INLINE auto is_constant_evaluated( + bool default_value = false) noexcept -> bool { +#ifdef __cpp_lib_is_constant_evaluated + ignore_unused(default_value); + return std::is_constant_evaluated(); +#else + return default_value; +#endif +} + +// Suppresses "conditional expression is constant" warnings. +template constexpr FMT_INLINE auto const_check(T value) -> T { + return value; +} + +FMT_NORETURN FMT_API void assert_fail(const char* file, int line, + const char* message); + +#ifndef FMT_ASSERT +# ifdef NDEBUG +// FMT_ASSERT is not empty to avoid -Wempty-body. +# define FMT_ASSERT(condition, message) \ + ::fmt::detail::ignore_unused((condition), (message)) +# else +# define FMT_ASSERT(condition, message) \ + ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ + ? (void)0 \ + : ::fmt::detail::assert_fail(__FILE__, __LINE__, (message))) +# endif +#endif + +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) +template +using std_string_view = std::experimental::basic_string_view; +#else +template struct std_string_view {}; +#endif + +#ifdef FMT_USE_INT128 +// Do nothing. +#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ + !(FMT_CLANG_VERSION && FMT_MSC_VERSION) +# define FMT_USE_INT128 1 +using int128_opt = __int128_t; // An optional native 128-bit integer. +using uint128_opt = __uint128_t; +template inline auto convert_for_visit(T value) -> T { + return value; +} +#else +# define FMT_USE_INT128 0 +#endif +#if !FMT_USE_INT128 +enum class int128_opt {}; +enum class uint128_opt {}; +// Reduce template instantiations. +template auto convert_for_visit(T) -> monostate { return {}; } +#endif + +// Casts a nonnegative integer to unsigned. +template +FMT_CONSTEXPR auto to_unsigned(Int value) -> + typename std::make_unsigned::type { + FMT_ASSERT(std::is_unsigned::value || value >= 0, "negative value"); + return static_cast::type>(value); +} + +FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char micro[] = "\u00B5"; + +constexpr auto is_utf8() -> bool { + // Avoid buggy sign extensions in MSVC's constant evaluation mode (#2297). + using uchar = unsigned char; + return FMT_UNICODE || (sizeof(micro) == 3 && uchar(micro[0]) == 0xC2 && + uchar(micro[1]) == 0xB5); +} +FMT_END_DETAIL_NAMESPACE + +/** + An implementation of ``std::basic_string_view`` for pre-C++17. It provides a + subset of the API. ``fmt::basic_string_view`` is used for format strings even + if ``std::string_view`` is available to prevent issues when a library is + compiled with a different ``-std`` option than the client code (which is not + recommended). + */ +template class basic_string_view { + private: + const Char* data_; + size_t size_; + + public: + using value_type = Char; + using iterator = const Char*; + + constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {} + + /** Constructs a string reference object from a C string and a size. */ + constexpr basic_string_view(const Char* s, size_t count) noexcept + : data_(s), size_(count) {} + + /** + \rst + Constructs a string reference object from a C string computing + the size with ``std::char_traits::length``. + \endrst + */ + FMT_CONSTEXPR_CHAR_TRAITS + FMT_INLINE + basic_string_view(const Char* s) + : data_(s), + size_(detail::const_check(std::is_same::value && + !detail::is_constant_evaluated(true)) + ? std::strlen(reinterpret_cast(s)) + : std::char_traits::length(s)) {} + + /** Constructs a string reference from a ``std::basic_string`` object. */ + template + FMT_CONSTEXPR basic_string_view( + const std::basic_string& s) noexcept + : data_(s.data()), size_(s.size()) {} + + template >::value)> + FMT_CONSTEXPR basic_string_view(S s) noexcept + : data_(s.data()), size_(s.size()) {} + + /** Returns a pointer to the string data. */ + constexpr auto data() const noexcept -> const Char* { return data_; } + + /** Returns the string size. */ + constexpr auto size() const noexcept -> size_t { return size_; } + + constexpr auto begin() const noexcept -> iterator { return data_; } + constexpr auto end() const noexcept -> iterator { return data_ + size_; } + + constexpr auto operator[](size_t pos) const noexcept -> const Char& { + return data_[pos]; + } + + FMT_CONSTEXPR void remove_prefix(size_t n) noexcept { + data_ += n; + size_ -= n; + } + + FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( + basic_string_view sv) const noexcept { + return size_ >= sv.size_ && + std::char_traits::compare(data_, sv.data_, sv.size_) == 0; + } + FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { + return size_ >= 1 && std::char_traits::eq(*data_, c); + } + FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { + return starts_with(basic_string_view(s)); + } + + // Lexicographically compare this string reference to other. + FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { + size_t str_size = size_ < other.size_ ? size_ : other.size_; + int result = std::char_traits::compare(data_, other.data_, str_size); + if (result == 0) + result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); + return result; + } + + FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, + basic_string_view rhs) + -> bool { + return lhs.compare(rhs) == 0; + } + friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) != 0; + } + friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) < 0; + } + friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) <= 0; + } + friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) > 0; + } + friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) >= 0; + } +}; + +using string_view = basic_string_view; + +/** Specifies if ``T`` is a character type. Can be specialized by users. */ +template struct is_char : std::false_type {}; +template <> struct is_char : std::true_type {}; + +FMT_BEGIN_DETAIL_NAMESPACE + +// A base class for compile-time strings. +struct compile_string {}; + +template +struct is_compile_string : std::is_base_of {}; + +// Returns a string view of `s`. +template ::value)> +FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { + return s; +} +template +inline auto to_string_view(const std::basic_string& s) + -> basic_string_view { + return s; +} +template +constexpr auto to_string_view(basic_string_view s) + -> basic_string_view { + return s; +} +template >::value)> +inline auto to_string_view(std_string_view s) -> basic_string_view { + return s; +} +template ::value)> +constexpr auto to_string_view(const S& s) + -> basic_string_view { + return basic_string_view(s); +} +void to_string_view(...); + +// Specifies whether S is a string type convertible to fmt::basic_string_view. +// It should be a constexpr function but MSVC 2017 fails to compile it in +// enable_if and MSVC 2015 fails to compile it as an alias template. +// ADL invocation of to_string_view is DEPRECATED! +template +struct is_string : std::is_class()))> { +}; + +template struct char_t_impl {}; +template struct char_t_impl::value>> { + using result = decltype(to_string_view(std::declval())); + using type = typename result::value_type; +}; + +enum class type { + none_type, + // Integer types should go first, + int_type, + uint_type, + long_long_type, + ulong_long_type, + int128_type, + uint128_type, + bool_type, + char_type, + last_integer_type = char_type, + // followed by floating-point types. + float_type, + double_type, + long_double_type, + last_numeric_type = long_double_type, + cstring_type, + string_type, + pointer_type, + custom_type +}; + +// Maps core type T to the corresponding type enum constant. +template +struct type_constant : std::integral_constant {}; + +#define FMT_TYPE_CONSTANT(Type, constant) \ + template \ + struct type_constant \ + : std::integral_constant {} + +FMT_TYPE_CONSTANT(int, int_type); +FMT_TYPE_CONSTANT(unsigned, uint_type); +FMT_TYPE_CONSTANT(long long, long_long_type); +FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); +FMT_TYPE_CONSTANT(int128_opt, int128_type); +FMT_TYPE_CONSTANT(uint128_opt, uint128_type); +FMT_TYPE_CONSTANT(bool, bool_type); +FMT_TYPE_CONSTANT(Char, char_type); +FMT_TYPE_CONSTANT(float, float_type); +FMT_TYPE_CONSTANT(double, double_type); +FMT_TYPE_CONSTANT(long double, long_double_type); +FMT_TYPE_CONSTANT(const Char*, cstring_type); +FMT_TYPE_CONSTANT(basic_string_view, string_type); +FMT_TYPE_CONSTANT(const void*, pointer_type); + +constexpr bool is_integral_type(type t) { + return t > type::none_type && t <= type::last_integer_type; +} + +constexpr bool is_arithmetic_type(type t) { + return t > type::none_type && t <= type::last_numeric_type; +} + +FMT_NORETURN FMT_API void throw_format_error(const char* message); + +struct error_handler { + constexpr error_handler() = default; + constexpr error_handler(const error_handler&) = default; + + // This function is intentionally not constexpr to give a compile-time error. + FMT_NORETURN void on_error(const char* message) { + throw_format_error(message); + } +}; +FMT_END_DETAIL_NAMESPACE + +/** String's character type. */ +template using char_t = typename detail::char_t_impl::type; + +/** + \rst + Parsing context consisting of a format string range being parsed and an + argument counter for automatic indexing. + You can use the ``format_parse_context`` type alias for ``char`` instead. + \endrst + */ +template +class basic_format_parse_context : private ErrorHandler { + private: + basic_string_view format_str_; + int next_arg_id_; + + FMT_CONSTEXPR void do_check_arg_id(int id); + + public: + using char_type = Char; + using iterator = typename basic_string_view::iterator; + + explicit constexpr basic_format_parse_context( + basic_string_view format_str, ErrorHandler eh = {}, + int next_arg_id = 0) + : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {} + + /** + Returns an iterator to the beginning of the format string range being + parsed. + */ + constexpr auto begin() const noexcept -> iterator { + return format_str_.begin(); + } + + /** + Returns an iterator past the end of the format string range being parsed. + */ + constexpr auto end() const noexcept -> iterator { return format_str_.end(); } + + /** Advances the begin iterator to ``it``. */ + FMT_CONSTEXPR void advance_to(iterator it) { + format_str_.remove_prefix(detail::to_unsigned(it - begin())); + } + + /** + Reports an error if using the manual argument indexing; otherwise returns + the next argument index and switches to the automatic indexing. + */ + FMT_CONSTEXPR auto next_arg_id() -> int { + if (next_arg_id_ < 0) { + on_error("cannot switch from manual to automatic argument indexing"); + return 0; + } + int id = next_arg_id_++; + do_check_arg_id(id); + return id; + } + + /** + Reports an error if using the automatic argument indexing; otherwise + switches to the manual indexing. + */ + FMT_CONSTEXPR void check_arg_id(int id) { + if (next_arg_id_ > 0) { + on_error("cannot switch from automatic to manual argument indexing"); + return; + } + next_arg_id_ = -1; + do_check_arg_id(id); + } + FMT_CONSTEXPR void check_arg_id(basic_string_view) {} + FMT_CONSTEXPR void check_dynamic_spec(int arg_id); + + FMT_CONSTEXPR void on_error(const char* message) { + ErrorHandler::on_error(message); + } + + constexpr auto error_handler() const -> ErrorHandler { return *this; } +}; + +using format_parse_context = basic_format_parse_context; + +FMT_BEGIN_DETAIL_NAMESPACE +// A parse context with extra data used only in compile-time checks. +template +class compile_parse_context + : public basic_format_parse_context { + private: + int num_args_; + const type* types_; + using base = basic_format_parse_context; + + public: + explicit FMT_CONSTEXPR compile_parse_context( + basic_string_view format_str, int num_args, const type* types, + ErrorHandler eh = {}, int next_arg_id = 0) + : base(format_str, eh, next_arg_id), num_args_(num_args), types_(types) {} + + constexpr auto num_args() const -> int { return num_args_; } + constexpr auto arg_type(int id) const -> type { return types_[id]; } + + FMT_CONSTEXPR auto next_arg_id() -> int { + int id = base::next_arg_id(); + if (id >= num_args_) this->on_error("argument not found"); + return id; + } + + FMT_CONSTEXPR void check_arg_id(int id) { + base::check_arg_id(id); + if (id >= num_args_) this->on_error("argument not found"); + } + using base::check_arg_id; + + FMT_CONSTEXPR void check_dynamic_spec(int arg_id) { + detail::ignore_unused(arg_id); +#if !defined(__LCC__) + if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id])) + this->on_error("width/precision is not integer"); +#endif + } +}; +FMT_END_DETAIL_NAMESPACE + +template +FMT_CONSTEXPR void +basic_format_parse_context::do_check_arg_id(int id) { + // Argument id is only checked at compile-time during parsing because + // formatting has its own validation. + if (detail::is_constant_evaluated() && FMT_GCC_VERSION >= 1200) { + using context = detail::compile_parse_context; + if (id >= static_cast(this)->num_args()) + on_error("argument not found"); + } +} + +template +FMT_CONSTEXPR void +basic_format_parse_context::check_dynamic_spec(int arg_id) { + if (detail::is_constant_evaluated()) { + using context = detail::compile_parse_context; + static_cast(this)->check_dynamic_spec(arg_id); + } +} + +template class basic_format_arg; +template class basic_format_args; +template class dynamic_format_arg_store; + +// A formatter for objects of type T. +template +struct formatter { + // A deleted default constructor indicates a disabled formatter. + formatter() = delete; +}; + +// Specifies if T has an enabled formatter specialization. A type can be +// formattable even if it doesn't have a formatter e.g. via a conversion. +template +using has_formatter = + std::is_constructible>; + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; +template +struct is_contiguous> : std::true_type {}; + +class appender; + +FMT_BEGIN_DETAIL_NAMESPACE + +template +constexpr auto has_const_formatter_impl(T*) + -> decltype(typename Context::template formatter_type().format( + std::declval(), std::declval()), + true) { + return true; +} +template +constexpr auto has_const_formatter_impl(...) -> bool { + return false; +} +template +constexpr auto has_const_formatter() -> bool { + return has_const_formatter_impl(static_cast(nullptr)); +} + +// Extracts a reference to the container from back_insert_iterator. +template +inline auto get_container(std::back_insert_iterator it) + -> Container& { + using base = std::back_insert_iterator; + struct accessor : base { + accessor(base b) : base(b) {} + using base::container; + }; + return *accessor(it).container; +} + +template +FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) + -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template , U>::value&& is_char::value)> +FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { + if (is_constant_evaluated()) return copy_str(begin, end, out); + auto size = to_unsigned(end - begin); + memcpy(out, begin, size * sizeof(U)); + return out + size; +} + +/** + \rst + A contiguous memory buffer with an optional growing ability. It is an internal + class and shouldn't be used directly, only via `~fmt::basic_memory_buffer`. + \endrst + */ +template class buffer { + private: + T* ptr_; + size_t size_; + size_t capacity_; + + protected: + // Don't initialize ptr_ since it is not accessed to save a few cycles. + FMT_MSC_WARNING(suppress : 26495) + buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} + + FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap) {} + + FMT_CONSTEXPR20 ~buffer() = default; + buffer(buffer&&) = default; + + /** Sets the buffer data and capacity. */ + FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept { + ptr_ = buf_data; + capacity_ = buf_capacity; + } + + /** Increases the buffer capacity to hold at least *capacity* elements. */ + virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; + + public: + using value_type = T; + using const_reference = const T&; + + buffer(const buffer&) = delete; + void operator=(const buffer&) = delete; + + FMT_INLINE auto begin() noexcept -> T* { return ptr_; } + FMT_INLINE auto end() noexcept -> T* { return ptr_ + size_; } + + FMT_INLINE auto begin() const noexcept -> const T* { return ptr_; } + FMT_INLINE auto end() const noexcept -> const T* { return ptr_ + size_; } + + /** Returns the size of this buffer. */ + constexpr auto size() const noexcept -> size_t { return size_; } + + /** Returns the capacity of this buffer. */ + constexpr auto capacity() const noexcept -> size_t { return capacity_; } + + /** Returns a pointer to the buffer data. */ + FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; } + + /** Returns a pointer to the buffer data. */ + FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; } + + /** Clears this buffer. */ + void clear() { size_ = 0; } + + // Tries resizing the buffer to contain *count* elements. If T is a POD type + // the new elements may not be initialized. + FMT_CONSTEXPR20 void try_resize(size_t count) { + try_reserve(count); + size_ = count <= capacity_ ? count : capacity_; + } + + // Tries increasing the buffer capacity to *new_capacity*. It can increase the + // capacity by a smaller amount than requested but guarantees there is space + // for at least one additional element either by increasing the capacity or by + // flushing the buffer if it is full. + FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { + if (new_capacity > capacity_) grow(new_capacity); + } + + FMT_CONSTEXPR20 void push_back(const T& value) { + try_reserve(size_ + 1); + ptr_[size_++] = value; + } + + /** Appends data to the end of the buffer. */ + template void append(const U* begin, const U* end); + + template FMT_CONSTEXPR auto operator[](Idx index) -> T& { + return ptr_[index]; + } + template + FMT_CONSTEXPR auto operator[](Idx index) const -> const T& { + return ptr_[index]; + } +}; + +struct buffer_traits { + explicit buffer_traits(size_t) {} + auto count() const -> size_t { return 0; } + auto limit(size_t size) -> size_t { return size; } +}; + +class fixed_buffer_traits { + private: + size_t count_ = 0; + size_t limit_; + + public: + explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} + auto count() const -> size_t { return count_; } + auto limit(size_t size) -> size_t { + size_t n = limit_ > count_ ? limit_ - count_ : 0; + count_ += size; + return size < n ? size : n; + } +}; + +// A buffer that writes to an output iterator when flushed. +template +class iterator_buffer final : public Traits, public buffer { + private: + OutputIt out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + protected: + FMT_CONSTEXPR20 void grow(size_t) override { + if (this->size() == buffer_size) flush(); + } + + void flush() { + auto size = this->size(); + this->clear(); + out_ = copy_str(data_, data_ + this->limit(size), out_); + } + + public: + explicit iterator_buffer(OutputIt out, size_t n = buffer_size) + : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} + iterator_buffer(iterator_buffer&& other) + : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} + ~iterator_buffer() { flush(); } + + auto out() -> OutputIt { + flush(); + return out_; + } + auto count() const -> size_t { return Traits::count() + this->size(); } +}; + +template +class iterator_buffer final + : public fixed_buffer_traits, + public buffer { + private: + T* out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + protected: + FMT_CONSTEXPR20 void grow(size_t) override { + if (this->size() == this->capacity()) flush(); + } + + void flush() { + size_t n = this->limit(this->size()); + if (this->data() == out_) { + out_ += n; + this->set(data_, buffer_size); + } + this->clear(); + } + + public: + explicit iterator_buffer(T* out, size_t n = buffer_size) + : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} + iterator_buffer(iterator_buffer&& other) + : fixed_buffer_traits(other), + buffer(std::move(other)), + out_(other.out_) { + if (this->data() != out_) { + this->set(data_, buffer_size); + this->clear(); + } + } + ~iterator_buffer() { flush(); } + + auto out() -> T* { + flush(); + return out_; + } + auto count() const -> size_t { + return fixed_buffer_traits::count() + this->size(); + } +}; + +template class iterator_buffer final : public buffer { + protected: + FMT_CONSTEXPR20 void grow(size_t) override {} + + public: + explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} + + auto out() -> T* { return &*this->end(); } +}; + +// A buffer that writes to a container with the contiguous storage. +template +class iterator_buffer, + enable_if_t::value, + typename Container::value_type>> + final : public buffer { + private: + Container& container_; + + protected: + FMT_CONSTEXPR20 void grow(size_t capacity) override { + container_.resize(capacity); + this->set(&container_[0], capacity); + } + + public: + explicit iterator_buffer(Container& c) + : buffer(c.size()), container_(c) {} + explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) + : iterator_buffer(get_container(out)) {} + + auto out() -> std::back_insert_iterator { + return std::back_inserter(container_); + } +}; + +// A buffer that counts the number of code units written discarding the output. +template class counting_buffer final : public buffer { + private: + enum { buffer_size = 256 }; + T data_[buffer_size]; + size_t count_ = 0; + + protected: + FMT_CONSTEXPR20 void grow(size_t) override { + if (this->size() != buffer_size) return; + count_ += this->size(); + this->clear(); + } + + public: + counting_buffer() : buffer(data_, 0, buffer_size) {} + + auto count() -> size_t { return count_ + this->size(); } +}; + +template +using buffer_appender = conditional_t::value, appender, + std::back_insert_iterator>>; + +// Maps an output iterator to a buffer. +template +auto get_buffer(OutputIt out) -> iterator_buffer { + return iterator_buffer(out); +} +template , Buf>::value)> +auto get_buffer(std::back_insert_iterator out) -> buffer& { + return get_container(out); +} + +template +FMT_INLINE auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) { + return buf.out(); +} +template +auto get_iterator(buffer&, OutputIt out) -> OutputIt { + return out; +} + +template +struct fallback_formatter { + fallback_formatter() = delete; +}; + +// Specifies if T has an enabled fallback_formatter specialization. +template +using has_fallback_formatter = +#ifdef FMT_DEPRECATED_OSTREAM + std::is_constructible>; +#else + std::false_type; +#endif + +struct view {}; + +template struct named_arg : view { + const Char* name; + const T& value; + named_arg(const Char* n, const T& v) : name(n), value(v) {} +}; + +template struct named_arg_info { + const Char* name; + int id; +}; + +template +struct arg_data { + // args_[0].named_args points to named_args_ to avoid bloating format_args. + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)]; + named_arg_info named_args_[NUM_NAMED_ARGS]; + + template + arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {} + arg_data(const arg_data& other) = delete; + auto args() const -> const T* { return args_ + 1; } + auto named_args() -> named_arg_info* { return named_args_; } +}; + +template +struct arg_data { + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; + + template + FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {} + FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; } + FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t { + return nullptr; + } +}; + +template +inline void init_named_args(named_arg_info*, int, int) {} + +template struct is_named_arg : std::false_type {}; +template struct is_statically_named_arg : std::false_type {}; + +template +struct is_named_arg> : std::true_type {}; + +template ::value)> +void init_named_args(named_arg_info* named_args, int arg_count, + int named_arg_count, const T&, const Tail&... args) { + init_named_args(named_args, arg_count + 1, named_arg_count, args...); +} + +template ::value)> +void init_named_args(named_arg_info* named_args, int arg_count, + int named_arg_count, const T& arg, const Tail&... args) { + named_args[named_arg_count++] = {arg.name, arg_count}; + init_named_args(named_args, arg_count + 1, named_arg_count, args...); +} + +template +FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int, + const Args&...) {} + +template constexpr auto count() -> size_t { return B ? 1 : 0; } +template constexpr auto count() -> size_t { + return (B1 ? 1 : 0) + count(); +} + +template constexpr auto count_named_args() -> size_t { + return count::value...>(); +} + +template +constexpr auto count_statically_named_args() -> size_t { + return count::value...>(); +} + +struct unformattable {}; +struct unformattable_char : unformattable {}; +struct unformattable_const : unformattable {}; +struct unformattable_pointer : unformattable {}; + +template struct string_value { + const Char* data; + size_t size; +}; + +template struct named_arg_value { + const named_arg_info* data; + size_t size; +}; + +template struct custom_value { + using parse_context = typename Context::parse_context_type; + void* value; + void (*format)(void* arg, parse_context& parse_ctx, Context& ctx); +}; + +// A formatting argument value. +template class value { + public: + using char_type = typename Context::char_type; + + union { + monostate no_value; + int int_value; + unsigned uint_value; + long long long_long_value; + unsigned long long ulong_long_value; + int128_opt int128_value; + uint128_opt uint128_value; + bool bool_value; + char_type char_value; + float float_value; + double double_value; + long double long_double_value; + const void* pointer; + string_value string; + custom_value custom; + named_arg_value named_args; + }; + + constexpr FMT_INLINE value() : no_value() {} + constexpr FMT_INLINE value(int val) : int_value(val) {} + constexpr FMT_INLINE value(unsigned val) : uint_value(val) {} + constexpr FMT_INLINE value(long long val) : long_long_value(val) {} + constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {} + FMT_INLINE value(int128_opt val) : int128_value(val) {} + FMT_INLINE value(uint128_opt val) : uint128_value(val) {} + constexpr FMT_INLINE value(float val) : float_value(val) {} + constexpr FMT_INLINE value(double val) : double_value(val) {} + FMT_INLINE value(long double val) : long_double_value(val) {} + constexpr FMT_INLINE value(bool val) : bool_value(val) {} + constexpr FMT_INLINE value(char_type val) : char_value(val) {} + FMT_CONSTEXPR FMT_INLINE value(const char_type* val) { + string.data = val; + if (is_constant_evaluated()) string.size = {}; + } + FMT_CONSTEXPR FMT_INLINE value(basic_string_view val) { + string.data = val.data(); + string.size = val.size(); + } + FMT_INLINE value(const void* val) : pointer(val) {} + FMT_INLINE value(const named_arg_info* args, size_t size) + : named_args{args, size} {} + + template FMT_CONSTEXPR FMT_INLINE value(T& val) { + using value_type = remove_cvref_t; + custom.value = const_cast(&val); + // Get the formatter type through the context to allow different contexts + // have different extension points, e.g. `formatter` for `format` and + // `printf_formatter` for `printf`. + custom.format = format_custom_arg< + value_type, + conditional_t::value, + typename Context::template formatter_type, + fallback_formatter>>; + } + value(unformattable); + value(unformattable_char); + value(unformattable_const); + value(unformattable_pointer); + + private: + // Formats an argument of a custom type, such as a user-defined class. + template + static void format_custom_arg(void* arg, + typename Context::parse_context_type& parse_ctx, + Context& ctx) { + auto f = Formatter(); + parse_ctx.advance_to(f.parse(parse_ctx)); + using qualified_type = + conditional_t(), const T, T>; + ctx.advance_to(f.format(*static_cast(arg), ctx)); + } +}; + +template +FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg; + +// To minimize the number of types we need to deal with, long is translated +// either to int or to long long depending on its size. +enum { long_short = sizeof(long) == sizeof(int) }; +using long_type = conditional_t; +using ulong_type = conditional_t; + +#ifdef __cpp_lib_byte +inline auto format_as(std::byte b) -> unsigned char { + return static_cast(b); +} +#endif + +template struct has_format_as { + template ::value&& std::is_integral::value)> + static auto check(U*) -> std::true_type; + static auto check(...) -> std::false_type; + + enum { value = decltype(check(static_cast(nullptr)))::value }; +}; + +// Maps formatting arguments to core types. +// arg_mapper reports errors by returning unformattable instead of using +// static_assert because it's used in the is_formattable trait. +template struct arg_mapper { + using char_type = typename Context::char_type; + + FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val) + -> unsigned long long { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(int128_opt val) -> int128_opt { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(uint128_opt val) -> uint128_opt { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; } + + template ::value || + std::is_same::value)> + FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type { + return val; + } + template ::value || +#ifdef __cpp_char8_t + std::is_same::value || +#endif + std::is_same::value || + std::is_same::value) && + !std::is_same::value, + int> = 0> + FMT_CONSTEXPR FMT_INLINE auto map(T) -> unformattable_char { + return {}; + } + + FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double { + return val; + } + + FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* { + return val; + } + template ::value && !std::is_pointer::value && + std::is_same>::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { + return to_string_view(val); + } + template ::value && !std::is_pointer::value && + !std::is_same>::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T&) -> unformattable_char { + return {}; + } + template >::value && + !is_string::value && !has_formatter::value && + !has_fallback_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { + return basic_string_view(val); + } + template >::value && + !std::is_convertible>::value && + !is_string::value && !has_formatter::value && + !has_fallback_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> basic_string_view { + return std_string_view(val); + } + + FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; } + FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* { + return val; + } + FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* { + return val; + } + + // We use SFINAE instead of a const T* parameter to avoid conflicting with + // the C array overload. + template < + typename T, + FMT_ENABLE_IF( + std::is_pointer::value || std::is_member_pointer::value || + std::is_function::type>::value || + (std::is_convertible::value && + !std::is_convertible::value && + !has_formatter::value))> + FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer { + return {}; + } + + template ::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] { + return values; + } + + template ::value&& std::is_convertible::value && + !has_format_as::value && !has_formatter::value && + !has_fallback_formatter::value)> + FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> decltype(std::declval().map( + static_cast>(val))) { + return map(static_cast>(val)); + } + + template ::value && + !has_formatter::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& val) + -> decltype(std::declval().map(format_as(T()))) { + return map(format_as(val)); + } + + template > + struct formattable + : bool_constant() || + !std::is_const>::value || + has_fallback_formatter::value> {}; + +#if (FMT_MSC_VERSION != 0 && FMT_MSC_VERSION < 1910) || \ + FMT_ICC_VERSION != 0 || defined(__NVCC__) + // Workaround a bug in MSVC and Intel (Issue 2746). + template FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { + return val; + } +#else + template ::value)> + FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { + return val; + } + template ::value)> + FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable_const { + return {}; + } +#endif + + template , + FMT_ENABLE_IF(!is_string::value && !is_char::value && + !std::is_array::value && + !std::is_pointer::value && + !has_format_as::value && + (has_formatter::value || + has_fallback_formatter::value))> + FMT_CONSTEXPR FMT_INLINE auto map(T&& val) + -> decltype(this->do_map(std::forward(val))) { + return do_map(std::forward(val)); + } + + template ::value)> + FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) + -> decltype(std::declval().map(named_arg.value)) { + return map(named_arg.value); + } + + auto map(...) -> unformattable { return {}; } +}; + +// A type constant after applying arg_mapper. +template +using mapped_type_constant = + type_constant().map(std::declval())), + typename Context::char_type>; + +enum { packed_arg_bits = 4 }; +// Maximum number of arguments with packed types. +enum { max_packed_args = 62 / packed_arg_bits }; +enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; +enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; + +FMT_END_DETAIL_NAMESPACE + +// An output iterator that appends to a buffer. +// It is used to reduce symbol sizes for the common case. +class appender : public std::back_insert_iterator> { + using base = std::back_insert_iterator>; + + public: + using std::back_insert_iterator>::back_insert_iterator; + appender(base it) noexcept : base(it) {} + FMT_UNCHECKED_ITERATOR(appender); + + auto operator++() noexcept -> appender& { return *this; } + auto operator++(int) noexcept -> appender { return *this; } +}; + +// A formatting argument. It is a trivially copyable/constructible type to +// allow storage in basic_memory_buffer. +template class basic_format_arg { + private: + detail::value value_; + detail::type type_; + + template + friend FMT_CONSTEXPR auto detail::make_arg(T&& value) + -> basic_format_arg; + + template + friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, + const basic_format_arg& arg) + -> decltype(vis(0)); + + friend class basic_format_args; + friend class dynamic_format_arg_store; + + using char_type = typename Context::char_type; + + template + friend struct detail::arg_data; + + basic_format_arg(const detail::named_arg_info* args, size_t size) + : value_(args, size) {} + + public: + class handle { + public: + explicit handle(detail::custom_value custom) : custom_(custom) {} + + void format(typename Context::parse_context_type& parse_ctx, + Context& ctx) const { + custom_.format(custom_.value, parse_ctx, ctx); + } + + private: + detail::custom_value custom_; + }; + + constexpr basic_format_arg() : type_(detail::type::none_type) {} + + constexpr explicit operator bool() const noexcept { + return type_ != detail::type::none_type; + } + + auto type() const -> detail::type { return type_; } + + auto is_integral() const -> bool { return detail::is_integral_type(type_); } + auto is_arithmetic() const -> bool { + return detail::is_arithmetic_type(type_); + } +}; + +/** + \rst + Visits an argument dispatching to the appropriate visit method based on + the argument type. For example, if the argument type is ``double`` then + ``vis(value)`` will be called with the value of type ``double``. + \endrst + */ +template +FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( + Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { + switch (arg.type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(arg.value_.int_value); + case detail::type::uint_type: + return vis(arg.value_.uint_value); + case detail::type::long_long_type: + return vis(arg.value_.long_long_value); + case detail::type::ulong_long_type: + return vis(arg.value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(arg.value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(arg.value_.uint128_value)); + case detail::type::bool_type: + return vis(arg.value_.bool_value); + case detail::type::char_type: + return vis(arg.value_.char_value); + case detail::type::float_type: + return vis(arg.value_.float_value); + case detail::type::double_type: + return vis(arg.value_.double_value); + case detail::type::long_double_type: + return vis(arg.value_.long_double_value); + case detail::type::cstring_type: + return vis(arg.value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(arg.value_.string.data, arg.value_.string.size)); + case detail::type::pointer_type: + return vis(arg.value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(arg.value_.custom)); + } + return vis(monostate()); +} + +FMT_BEGIN_DETAIL_NAMESPACE + +template +auto copy_str(InputIt begin, InputIt end, appender out) -> appender { + get_container(out).append(begin, end); + return out; +} + +template +FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { + return detail::copy_str(rng.begin(), rng.end(), out); +} + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { using type = void; }; +template +using void_t = typename detail::void_t_impl::type; +#else +template using void_t = void; +#endif + +template +struct is_output_iterator : std::false_type {}; + +template +struct is_output_iterator< + It, T, + void_t::iterator_category, + decltype(*std::declval() = std::declval())>> + : std::true_type {}; + +template +struct is_back_insert_iterator : std::false_type {}; +template +struct is_back_insert_iterator> + : std::true_type {}; + +template +struct is_contiguous_back_insert_iterator : std::false_type {}; +template +struct is_contiguous_back_insert_iterator> + : is_contiguous {}; +template <> +struct is_contiguous_back_insert_iterator : std::true_type {}; + +// A type-erased reference to an std::locale to avoid a heavy include. +class locale_ref { + private: + const void* locale_; // A type-erased pointer to std::locale. + + public: + constexpr FMT_INLINE locale_ref() : locale_(nullptr) {} + template explicit locale_ref(const Locale& loc); + + explicit operator bool() const noexcept { return locale_ != nullptr; } + + template auto get() const -> Locale; +}; + +template constexpr auto encode_types() -> unsigned long long { + return 0; +} + +template +constexpr auto encode_types() -> unsigned long long { + return static_cast(mapped_type_constant::value) | + (encode_types() << packed_arg_bits); +} + +template +FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value { + const auto& arg = arg_mapper().map(FMT_FORWARD(val)); + + constexpr bool formattable_char = + !std::is_same::value; + static_assert(formattable_char, "Mixing character types is disallowed."); + + constexpr bool formattable_const = + !std::is_same::value; + static_assert(formattable_const, "Cannot format a const argument."); + + // Formatting of arbitrary pointers is disallowed. If you want to output + // a pointer cast it to "void *" or "const void *". In particular, this + // forbids formatting of "[const] volatile char *" which is printed as bool + // by iostreams. + constexpr bool formattable_pointer = + !std::is_same::value; + static_assert(formattable_pointer, + "Formatting of non-void pointers is disallowed."); + + constexpr bool formattable = + !std::is_same::value; + static_assert( + formattable, + "Cannot format an argument. To make type T formattable provide a " + "formatter specialization: https://fmt.dev/latest/api.html#udt"); + return {arg}; +} + +template +FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg { + basic_format_arg arg; + arg.type_ = mapped_type_constant::value; + arg.value_ = make_value(value); + return arg; +} + +// The type template parameter is there to avoid an ODR violation when using +// a fallback formatter in one translation unit and an implicit conversion in +// another (not recommended). +template +FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value { + return make_value(val); +} + +template +FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg { + return make_arg(value); +} +FMT_END_DETAIL_NAMESPACE + +// Formatting context. +template class basic_format_context { + public: + /** The character type for the output. */ + using char_type = Char; + + private: + OutputIt out_; + basic_format_args args_; + detail::locale_ref loc_; + + public: + using iterator = OutputIt; + using format_arg = basic_format_arg; + using parse_context_type = basic_format_parse_context; + template using formatter_type = formatter; + + basic_format_context(basic_format_context&&) = default; + basic_format_context(const basic_format_context&) = delete; + void operator=(const basic_format_context&) = delete; + /** + Constructs a ``basic_format_context`` object. References to the arguments are + stored in the object so make sure they have appropriate lifetimes. + */ + constexpr basic_format_context( + OutputIt out, basic_format_args ctx_args, + detail::locale_ref loc = detail::locale_ref()) + : out_(out), args_(ctx_args), loc_(loc) {} + + constexpr auto arg(int id) const -> format_arg { return args_.get(id); } + FMT_CONSTEXPR auto arg(basic_string_view name) -> format_arg { + return args_.get(name); + } + FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { + return args_; + } + + FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } + void on_error(const char* message) { error_handler().on_error(message); } + + // Returns an iterator to the beginning of the output range. + FMT_CONSTEXPR auto out() -> iterator { return out_; } + + // Advances the begin iterator to ``it``. + void advance_to(iterator it) { + if (!detail::is_back_insert_iterator()) out_ = it; + } + + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } +}; + +template +using buffer_context = + basic_format_context, Char>; +using format_context = buffer_context; + +// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. +#define FMT_BUFFER_CONTEXT(Char) \ + basic_format_context, Char> + +template +using is_formattable = bool_constant< + !std::is_base_of>().map( + std::declval()))>::value && + !detail::has_fallback_formatter::value>; + +/** + \rst + An array of references to arguments. It can be implicitly converted into + `~fmt::basic_format_args` for passing into type-erased formatting functions + such as `~fmt::vformat`. + \endrst + */ +template +class format_arg_store +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 + // Workaround a GCC template argument substitution bug. + : public basic_format_args +#endif +{ + private: + static const size_t num_args = sizeof...(Args); + static const size_t num_named_args = detail::count_named_args(); + static const bool is_packed = num_args <= detail::max_packed_args; + + using value_type = conditional_t, + basic_format_arg>; + + detail::arg_data + data_; + + friend class basic_format_args; + + static constexpr unsigned long long desc = + (is_packed ? detail::encode_types() + : detail::is_unpacked_bit | num_args) | + (num_named_args != 0 + ? static_cast(detail::has_named_args_bit) + : 0); + + public: + template + FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args) + : +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 + basic_format_args(*this), +#endif + data_{detail::make_arg< + is_packed, Context, + detail::mapped_type_constant, Context>::value>( + FMT_FORWARD(args))...} { + detail::init_named_args(data_.named_args(), 0, 0, args...); + } +}; + +/** + \rst + Constructs a `~fmt::format_arg_store` object that contains references to + arguments and can be implicitly converted to `~fmt::format_args`. `Context` + can be omitted in which case it defaults to `~fmt::context`. + See `~fmt::arg` for lifetime considerations. + \endrst + */ +template +constexpr auto make_format_args(Args&&... args) + -> format_arg_store...> { + return {FMT_FORWARD(args)...}; +} + +/** + \rst + Returns a named argument to be used in a formatting function. + It should only be used in a call to a formatting function or + `dynamic_format_arg_store::push_back`. + + **Example**:: + + fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); + \endrst + */ +template +inline auto arg(const Char* name, const T& arg) -> detail::named_arg { + static_assert(!detail::is_named_arg(), "nested named arguments"); + return {name, arg}; +} + +/** + \rst + A view of a collection of formatting arguments. To avoid lifetime issues it + should only be used as a parameter type in type-erased functions such as + ``vformat``:: + + void vlog(string_view format_str, format_args args); // OK + format_args args = make_format_args(42); // Error: dangling reference + \endrst + */ +template class basic_format_args { + public: + using size_type = int; + using format_arg = basic_format_arg; + + private: + // A descriptor that contains information about formatting arguments. + // If the number of arguments is less or equal to max_packed_args then + // argument types are passed in the descriptor. This reduces binary code size + // per formatting function call. + unsigned long long desc_; + union { + // If is_packed() returns true then argument values are stored in values_; + // otherwise they are stored in args_. This is done to improve cache + // locality and reduce compiled code size since storing larger objects + // may require more code (at least on x86-64) even if the same amount of + // data is actually copied to stack. It saves ~10% on the bloat test. + const detail::value* values_; + const format_arg* args_; + }; + + constexpr auto is_packed() const -> bool { + return (desc_ & detail::is_unpacked_bit) == 0; + } + auto has_named_args() const -> bool { + return (desc_ & detail::has_named_args_bit) != 0; + } + + FMT_CONSTEXPR auto type(int index) const -> detail::type { + int shift = index * detail::packed_arg_bits; + unsigned int mask = (1 << detail::packed_arg_bits) - 1; + return static_cast((desc_ >> shift) & mask); + } + + constexpr FMT_INLINE basic_format_args(unsigned long long desc, + const detail::value* values) + : desc_(desc), values_(values) {} + constexpr basic_format_args(unsigned long long desc, const format_arg* args) + : desc_(desc), args_(args) {} + + public: + constexpr basic_format_args() : desc_(0), args_(nullptr) {} + + /** + \rst + Constructs a `basic_format_args` object from `~fmt::format_arg_store`. + \endrst + */ + template + constexpr FMT_INLINE basic_format_args( + const format_arg_store& store) + : basic_format_args(format_arg_store::desc, + store.data_.args()) {} + + /** + \rst + Constructs a `basic_format_args` object from + `~fmt::dynamic_format_arg_store`. + \endrst + */ + constexpr FMT_INLINE basic_format_args( + const dynamic_format_arg_store& store) + : basic_format_args(store.get_types(), store.data()) {} + + /** + \rst + Constructs a `basic_format_args` object from a dynamic set of arguments. + \endrst + */ + constexpr basic_format_args(const format_arg* args, int count) + : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count), + args) {} + + /** Returns the argument with the specified id. */ + FMT_CONSTEXPR auto get(int id) const -> format_arg { + format_arg arg; + if (!is_packed()) { + if (id < max_size()) arg = args_[id]; + return arg; + } + if (id >= detail::max_packed_args) return arg; + arg.type_ = type(id); + if (arg.type_ == detail::type::none_type) return arg; + arg.value_ = values_[id]; + return arg; + } + + template + auto get(basic_string_view name) const -> format_arg { + int id = get_id(name); + return id >= 0 ? get(id) : format_arg(); + } + + template + auto get_id(basic_string_view name) const -> int { + if (!has_named_args()) return -1; + const auto& named_args = + (is_packed() ? values_[-1] : args_[-1].value_).named_args; + for (size_t i = 0; i < named_args.size; ++i) { + if (named_args.data[i].name == name) return named_args.data[i].id; + } + return -1; + } + + auto max_size() const -> int { + unsigned long long max_packed = detail::max_packed_args; + return static_cast(is_packed() ? max_packed + : desc_ & ~detail::is_unpacked_bit); + } +}; + +/** An alias to ``basic_format_args``. */ +// A separate type would result in shorter symbols but break ABI compatibility +// between clang and gcc on ARM (#1919). +using format_args = basic_format_args; + +// We cannot use enum classes as bit fields because of a gcc bug, so we put them +// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414). +// Additionally, if an underlying type is specified, older gcc incorrectly warns +// that the type is too small. Both bugs are fixed in gcc 9.3. +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903 +# define FMT_ENUM_UNDERLYING_TYPE(type) +#else +# define FMT_ENUM_UNDERLYING_TYPE(type) : type +#endif +namespace align { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center, + numeric}; +} +using align_t = align::type; +namespace sign { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space}; +} +using sign_t = sign::type; + +FMT_BEGIN_DETAIL_NAMESPACE + +// Workaround an array initialization issue in gcc 4.8. +template struct fill_t { + private: + enum { max_size = 4 }; + Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; + unsigned char size_ = 1; + + public: + FMT_CONSTEXPR void operator=(basic_string_view s) { + auto size = s.size(); + if (size > max_size) return throw_format_error("invalid fill"); + for (size_t i = 0; i < size; ++i) data_[i] = s[i]; + size_ = static_cast(size); + } + + constexpr auto size() const -> size_t { return size_; } + constexpr auto data() const -> const Char* { return data_; } + + FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; } + FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& { + return data_[index]; + } +}; +FMT_END_DETAIL_NAMESPACE + +enum class presentation_type : unsigned char { + none, + // Integer types should go first, + dec, // 'd' + oct, // 'o' + hex_lower, // 'x' + hex_upper, // 'X' + bin_lower, // 'b' + bin_upper, // 'B' + hexfloat_lower, // 'a' + hexfloat_upper, // 'A' + exp_lower, // 'e' + exp_upper, // 'E' + fixed_lower, // 'f' + fixed_upper, // 'F' + general_lower, // 'g' + general_upper, // 'G' + chr, // 'c' + string, // 's' + pointer, // 'p' + debug // '?' +}; + +// Format specifiers for built-in and string types. +template struct basic_format_specs { + int width; + int precision; + presentation_type type; + align_t align : 4; + sign_t sign : 3; + bool alt : 1; // Alternate form ('#'). + bool localized : 1; + detail::fill_t fill; + + constexpr basic_format_specs() + : width(0), + precision(-1), + type(presentation_type::none), + align(align::none), + sign(sign::none), + alt(false), + localized(false) {} +}; + +using format_specs = basic_format_specs; + +FMT_BEGIN_DETAIL_NAMESPACE + +enum class arg_id_kind { none, index, name }; + +// An argument reference. +template struct arg_ref { + FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} + + FMT_CONSTEXPR explicit arg_ref(int index) + : kind(arg_id_kind::index), val(index) {} + FMT_CONSTEXPR explicit arg_ref(basic_string_view name) + : kind(arg_id_kind::name), val(name) {} + + FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { + kind = arg_id_kind::index; + val.index = idx; + return *this; + } + + arg_id_kind kind; + union value { + FMT_CONSTEXPR value(int id = 0) : index{id} {} + FMT_CONSTEXPR value(basic_string_view n) : name(n) {} + + int index; + basic_string_view name; + } val; +}; + +// Format specifiers with width and precision resolved at formatting rather +// than parsing time to allow re-using the same parsed specifiers with +// different sets of arguments (precompilation of format strings). +template +struct dynamic_format_specs : basic_format_specs { + arg_ref width_ref; + arg_ref precision_ref; +}; + +struct auto_id {}; + +// A format specifier handler that sets fields in basic_format_specs. +template class specs_setter { + protected: + basic_format_specs& specs_; + + public: + explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) + : specs_(specs) {} + + FMT_CONSTEXPR specs_setter(const specs_setter& other) + : specs_(other.specs_) {} + + FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } + FMT_CONSTEXPR void on_fill(basic_string_view fill) { + specs_.fill = fill; + } + FMT_CONSTEXPR void on_sign(sign_t s) { specs_.sign = s; } + FMT_CONSTEXPR void on_hash() { specs_.alt = true; } + FMT_CONSTEXPR void on_localized() { specs_.localized = true; } + + FMT_CONSTEXPR void on_zero() { + if (specs_.align == align::none) specs_.align = align::numeric; + specs_.fill[0] = Char('0'); + } + + FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } + FMT_CONSTEXPR void on_precision(int precision) { + specs_.precision = precision; + } + FMT_CONSTEXPR void end_precision() {} + + FMT_CONSTEXPR void on_type(presentation_type type) { specs_.type = type; } +}; + +// Format spec handler that saves references to arguments representing dynamic +// width and precision to be resolved at formatting time. +template +class dynamic_specs_handler + : public specs_setter { + public: + using char_type = typename ParseContext::char_type; + + FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs& specs, + ParseContext& ctx) + : specs_setter(specs), specs_(specs), context_(ctx) {} + + FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other) + : specs_setter(other), + specs_(other.specs_), + context_(other.context_) {} + + template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { + specs_.width_ref = make_arg_ref(arg_id); + } + + template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { + specs_.precision_ref = make_arg_ref(arg_id); + } + + FMT_CONSTEXPR void on_error(const char* message) { + context_.on_error(message); + } + + private: + dynamic_format_specs& specs_; + ParseContext& context_; + + using arg_ref_type = arg_ref; + + FMT_CONSTEXPR auto make_arg_ref(int arg_id) -> arg_ref_type { + context_.check_arg_id(arg_id); + context_.check_dynamic_spec(arg_id); + return arg_ref_type(arg_id); + } + + FMT_CONSTEXPR auto make_arg_ref(auto_id) -> arg_ref_type { + int arg_id = context_.next_arg_id(); + context_.check_dynamic_spec(arg_id); + return arg_ref_type(arg_id); + } + + FMT_CONSTEXPR auto make_arg_ref(basic_string_view arg_id) + -> arg_ref_type { + context_.check_arg_id(arg_id); + basic_string_view format_str( + context_.begin(), to_unsigned(context_.end() - context_.begin())); + return arg_ref_type(arg_id); + } +}; + +template constexpr bool is_ascii_letter(Char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +// Converts a character to ASCII. Returns a number > 127 on conversion failure. +template ::value)> +constexpr auto to_ascii(Char c) -> Char { + return c; +} +template ::value)> +constexpr auto to_ascii(Char c) -> underlying_t { + return c; +} + +FMT_CONSTEXPR inline auto code_point_length_impl(char c) -> int { + return "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4" + [static_cast(c) >> 3]; +} + +template +FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { + if (const_check(sizeof(Char) != 1)) return 1; + int len = code_point_length_impl(static_cast(*begin)); + + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + return len + !len; +} + +// Return the result via the out param to workaround gcc bug 77539. +template +FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { + for (out = first; out != last; ++out) { + if (*out == value) return true; + } + return false; +} + +template <> +inline auto find(const char* first, const char* last, char value, + const char*& out) -> bool { + out = static_cast( + std::memchr(first, value, to_unsigned(last - first))); + return out != nullptr; +} + +// Parses the range [begin, end) as an unsigned integer. This function assumes +// that the range is non-empty and the first character is a digit. +template +FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, + int error_value) noexcept -> int { + FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); + unsigned value = 0, prev = 0; + auto p = begin; + do { + prev = value; + value = value * 10 + unsigned(*p - '0'); + ++p; + } while (p != end && '0' <= *p && *p <= '9'); + auto num_digits = p - begin; + begin = p; + if (num_digits <= std::numeric_limits::digits10) + return static_cast(value); + // Check for overflow. + const unsigned max = to_unsigned((std::numeric_limits::max)()); + return num_digits == std::numeric_limits::digits10 + 1 && + prev * 10ull + unsigned(p[-1] - '0') <= max + ? static_cast(value) + : error_value; +} + +// Parses fill and alignment. +template +FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + auto align = align::none; + auto p = begin + code_point_length(begin); + if (end - p <= 0) p = begin; + for (;;) { + switch (to_ascii(*p)) { + case '<': + align = align::left; + break; + case '>': + align = align::right; + break; + case '^': + align = align::center; + break; + default: + break; + } + if (align != align::none) { + if (p != begin) { + auto c = *begin; + if (c == '{') + return handler.on_error("invalid fill character '{'"), begin; + if (c == '}') return begin; + handler.on_fill(basic_string_view(begin, to_unsigned(p - begin))); + begin = p + 1; + } else + ++begin; + handler.on_align(align); + break; + } else if (p == begin) { + break; + } + p = begin; + } + return begin; +} + +template FMT_CONSTEXPR bool is_name_start(Char c) { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; +} + +template +FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, + IDHandler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + Char c = *begin; + if (c >= '0' && c <= '9') { + int index = 0; + if (c != '0') + index = + parse_nonnegative_int(begin, end, (std::numeric_limits::max)()); + else + ++begin; + if (begin == end || (*begin != '}' && *begin != ':')) + handler.on_error("invalid format string"); + else + handler(index); + return begin; + } + if (!is_name_start(c)) { + handler.on_error("invalid format string"); + return begin; + } + auto it = begin; + do { + ++it; + } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9'))); + handler(basic_string_view(begin, to_unsigned(it - begin))); + return it; +} + +template +FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end, + IDHandler&& handler) -> const Char* { + Char c = *begin; + if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); + handler(); + return begin; +} + +template +FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + using detail::auto_id; + struct width_adapter { + Handler& handler; + + FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); } + FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + handler.on_dynamic_width(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + FMT_ASSERT(begin != end, ""); + if ('0' <= *begin && *begin <= '9') { + int width = parse_nonnegative_int(begin, end, -1); + if (width != -1) + handler.on_width(width); + else + handler.on_error("number is too big"); + } else if (*begin == '{') { + ++begin; + if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler}); + if (begin == end || *begin != '}') + return handler.on_error("invalid format string"), begin; + ++begin; + } + return begin; +} + +template +FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + using detail::auto_id; + struct precision_adapter { + Handler& handler; + + FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); } + FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + handler.on_dynamic_precision(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + ++begin; + auto c = begin != end ? *begin : Char(); + if ('0' <= c && c <= '9') { + auto precision = parse_nonnegative_int(begin, end, -1); + if (precision != -1) + handler.on_precision(precision); + else + handler.on_error("number is too big"); + } else if (c == '{') { + ++begin; + if (begin != end) + begin = parse_arg_id(begin, end, precision_adapter{handler}); + if (begin == end || *begin++ != '}') + return handler.on_error("invalid format string"), begin; + } else { + return handler.on_error("missing precision specifier"), begin; + } + handler.end_precision(); + return begin; +} + +template +FMT_CONSTEXPR auto parse_presentation_type(Char type) -> presentation_type { + switch (to_ascii(type)) { + case 'd': + return presentation_type::dec; + case 'o': + return presentation_type::oct; + case 'x': + return presentation_type::hex_lower; + case 'X': + return presentation_type::hex_upper; + case 'b': + return presentation_type::bin_lower; + case 'B': + return presentation_type::bin_upper; + case 'a': + return presentation_type::hexfloat_lower; + case 'A': + return presentation_type::hexfloat_upper; + case 'e': + return presentation_type::exp_lower; + case 'E': + return presentation_type::exp_upper; + case 'f': + return presentation_type::fixed_lower; + case 'F': + return presentation_type::fixed_upper; + case 'g': + return presentation_type::general_lower; + case 'G': + return presentation_type::general_upper; + case 'c': + return presentation_type::chr; + case 's': + return presentation_type::string; + case 'p': + return presentation_type::pointer; + case '?': + return presentation_type::debug; + default: + return presentation_type::none; + } +} + +// Parses standard format specifiers and sends notifications about parsed +// components to handler. +template +FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(const Char* begin, + const Char* end, + SpecHandler&& handler) + -> const Char* { + if (1 < end - begin && begin[1] == '}' && is_ascii_letter(*begin) && + *begin != 'L') { + presentation_type type = parse_presentation_type(*begin++); + if (type == presentation_type::none) + handler.on_error("invalid type specifier"); + handler.on_type(type); + return begin; + } + + if (begin == end) return begin; + + begin = parse_align(begin, end, handler); + if (begin == end) return begin; + + // Parse sign. + switch (to_ascii(*begin)) { + case '+': + handler.on_sign(sign::plus); + ++begin; + break; + case '-': + handler.on_sign(sign::minus); + ++begin; + break; + case ' ': + handler.on_sign(sign::space); + ++begin; + break; + default: + break; + } + if (begin == end) return begin; + + if (*begin == '#') { + handler.on_hash(); + if (++begin == end) return begin; + } + + // Parse zero flag. + if (*begin == '0') { + handler.on_zero(); + if (++begin == end) return begin; + } + + begin = parse_width(begin, end, handler); + if (begin == end) return begin; + + // Parse precision. + if (*begin == '.') { + begin = parse_precision(begin, end, handler); + if (begin == end) return begin; + } + + if (*begin == 'L') { + handler.on_localized(); + ++begin; + } + + // Parse type. + if (begin != end && *begin != '}') { + presentation_type type = parse_presentation_type(*begin++); + if (type == presentation_type::none) + handler.on_error("invalid type specifier"); + handler.on_type(type); + } + return begin; +} + +template +FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + struct id_adapter { + Handler& handler; + int arg_id; + + FMT_CONSTEXPR void operator()() { arg_id = handler.on_arg_id(); } + FMT_CONSTEXPR void operator()(int id) { arg_id = handler.on_arg_id(id); } + FMT_CONSTEXPR void operator()(basic_string_view id) { + arg_id = handler.on_arg_id(id); + } + FMT_CONSTEXPR void on_error(const char* message) { + if (message) handler.on_error(message); + } + }; + + ++begin; + if (begin == end) return handler.on_error("invalid format string"), end; + if (*begin == '}') { + handler.on_replacement_field(handler.on_arg_id(), begin); + } else if (*begin == '{') { + handler.on_text(begin, begin + 1); + } else { + auto adapter = id_adapter{handler, 0}; + begin = parse_arg_id(begin, end, adapter); + Char c = begin != end ? *begin : Char(); + if (c == '}') { + handler.on_replacement_field(adapter.arg_id, begin); + } else if (c == ':') { + begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); + if (begin == end || *begin != '}') + return handler.on_error("unknown format specifier"), end; + } else { + return handler.on_error("missing '}' in format string"), end; + } + } + return begin + 1; +} + +template +FMT_CONSTEXPR FMT_INLINE void parse_format_string( + basic_string_view format_str, Handler&& handler) { + // Workaround a name-lookup bug in MSVC's modules implementation. + using detail::find; + + auto begin = format_str.data(); + auto end = begin + format_str.size(); + if (end - begin < 32) { + // Use a simple loop instead of memchr for small strings. + const Char* p = begin; + while (p != end) { + auto c = *p++; + if (c == '{') { + handler.on_text(begin, p - 1); + begin = p = parse_replacement_field(p - 1, end, handler); + } else if (c == '}') { + if (p == end || *p != '}') + return handler.on_error("unmatched '}' in format string"); + handler.on_text(begin, p); + begin = ++p; + } + } + handler.on_text(begin, end); + return; + } + struct writer { + FMT_CONSTEXPR void operator()(const Char* from, const Char* to) { + if (from == to) return; + for (;;) { + const Char* p = nullptr; + if (!find(from, to, Char('}'), p)) + return handler_.on_text(from, to); + ++p; + if (p == to || *p != '}') + return handler_.on_error("unmatched '}' in format string"); + handler_.on_text(from, p); + from = p + 1; + } + } + Handler& handler_; + } write = {handler}; + while (begin != end) { + // Doing two passes with memchr (one for '{' and another for '}') is up to + // 2.5x faster than the naive one-pass implementation on big format strings. + const Char* p = begin; + if (*begin != '{' && !find(begin + 1, end, Char('{'), p)) + return write(begin, end); + write(begin, p); + begin = parse_replacement_field(p, end, handler); + } +} + +template ::value> struct strip_named_arg { + using type = T; +}; +template struct strip_named_arg { + using type = remove_cvref_t; +}; + +template +FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) + -> decltype(ctx.begin()) { + using char_type = typename ParseContext::char_type; + using context = buffer_context; + using stripped_type = typename strip_named_arg::type; + using mapped_type = conditional_t< + mapped_type_constant::value != type::custom_type, + decltype(arg_mapper().map(std::declval())), + stripped_type>; + auto f = conditional_t::value, + formatter, + fallback_formatter>(); + return f.parse(ctx); +} + +template +FMT_CONSTEXPR void check_int_type_spec(presentation_type type, + ErrorHandler&& eh) { + if (type > presentation_type::bin_upper && type != presentation_type::chr) + eh.on_error("invalid type specifier"); +} + +// Checks char specs and returns true if the type spec is char (and not int). +template +FMT_CONSTEXPR auto check_char_specs(const basic_format_specs& specs, + ErrorHandler&& eh = {}) -> bool { + if (specs.type != presentation_type::none && + specs.type != presentation_type::chr && + specs.type != presentation_type::debug) { + check_int_type_spec(specs.type, eh); + return false; + } + if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) + eh.on_error("invalid format specifier for char"); + return true; +} + +// A floating-point presentation format. +enum class float_format : unsigned char { + general, // General: exponent notation or fixed point based on magnitude. + exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. + fixed, // Fixed point with the default precision of 6, e.g. 0.0012. + hex +}; + +struct float_specs { + int precision; + float_format format : 8; + sign_t sign : 8; + bool upper : 1; + bool locale : 1; + bool binary32 : 1; + bool showpoint : 1; +}; + +template +FMT_CONSTEXPR auto parse_float_type_spec(const basic_format_specs& specs, + ErrorHandler&& eh = {}) + -> float_specs { + auto result = float_specs(); + result.showpoint = specs.alt; + result.locale = specs.localized; + switch (specs.type) { + case presentation_type::none: + result.format = float_format::general; + break; + case presentation_type::general_upper: + result.upper = true; + FMT_FALLTHROUGH; + case presentation_type::general_lower: + result.format = float_format::general; + break; + case presentation_type::exp_upper: + result.upper = true; + FMT_FALLTHROUGH; + case presentation_type::exp_lower: + result.format = float_format::exp; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::fixed_upper: + result.upper = true; + FMT_FALLTHROUGH; + case presentation_type::fixed_lower: + result.format = float_format::fixed; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::hexfloat_upper: + result.upper = true; + FMT_FALLTHROUGH; + case presentation_type::hexfloat_lower: + result.format = float_format::hex; + break; + default: + eh.on_error("invalid type specifier"); + break; + } + return result; +} + +template +FMT_CONSTEXPR auto check_cstring_type_spec(presentation_type type, + ErrorHandler&& eh = {}) -> bool { + if (type == presentation_type::none || type == presentation_type::string || + type == presentation_type::debug) + return true; + if (type != presentation_type::pointer) eh.on_error("invalid type specifier"); + return false; +} + +template +FMT_CONSTEXPR void check_string_type_spec(presentation_type type, + ErrorHandler&& eh = {}) { + if (type != presentation_type::none && type != presentation_type::string && + type != presentation_type::debug) + eh.on_error("invalid type specifier"); +} + +template +FMT_CONSTEXPR void check_pointer_type_spec(presentation_type type, + ErrorHandler&& eh) { + if (type != presentation_type::none && type != presentation_type::pointer) + eh.on_error("invalid type specifier"); +} + +// A parse_format_specs handler that checks if specifiers are consistent with +// the argument type. +template class specs_checker : public Handler { + private: + detail::type arg_type_; + + FMT_CONSTEXPR void require_numeric_argument() { + if (!is_arithmetic_type(arg_type_)) + this->on_error("format specifier requires numeric argument"); + } + + public: + FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type) + : Handler(handler), arg_type_(arg_type) {} + + FMT_CONSTEXPR void on_align(align_t align) { + if (align == align::numeric) require_numeric_argument(); + Handler::on_align(align); + } + + FMT_CONSTEXPR void on_sign(sign_t s) { + require_numeric_argument(); + if (is_integral_type(arg_type_) && arg_type_ != type::int_type && + arg_type_ != type::long_long_type && arg_type_ != type::int128_type && + arg_type_ != type::char_type) { + this->on_error("format specifier requires signed argument"); + } + Handler::on_sign(s); + } + + FMT_CONSTEXPR void on_hash() { + require_numeric_argument(); + Handler::on_hash(); + } + + FMT_CONSTEXPR void on_localized() { + require_numeric_argument(); + Handler::on_localized(); + } + + FMT_CONSTEXPR void on_zero() { + require_numeric_argument(); + Handler::on_zero(); + } + + FMT_CONSTEXPR void end_precision() { + if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) + this->on_error("precision not allowed for this argument type"); + } +}; + +constexpr int invalid_arg_index = -1; + +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +template +constexpr auto get_arg_index_by_name(basic_string_view name) -> int { + if constexpr (detail::is_statically_named_arg()) { + if (name == T::name) return N; + } + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name(name); + (void)name; // Workaround an MSVC bug about "unused" parameter. + return invalid_arg_index; +} +#endif + +template +FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name<0, Args...>(name); +#endif + (void)name; + return invalid_arg_index; +} + +template +class format_string_checker { + private: + // In the future basic_format_parse_context will replace compile_parse_context + // here and will use is_constant_evaluated and downcasting to access the data + // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1. + using parse_context_type = compile_parse_context; + static constexpr int num_args = sizeof...(Args); + + // Format specifier parsing function. + using parse_func = const Char* (*)(parse_context_type&); + + parse_context_type context_; + parse_func parse_funcs_[num_args > 0 ? static_cast(num_args) : 1]; + type types_[num_args > 0 ? static_cast(num_args) : 1]; + + public: + explicit FMT_CONSTEXPR format_string_checker( + basic_string_view format_str, ErrorHandler eh) + : context_(format_str, num_args, types_, eh), + parse_funcs_{&parse_format_specs...}, + types_{ + mapped_type_constant>::value...} { + } + + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + + FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } + FMT_CONSTEXPR auto on_arg_id(int id) -> int { + return context_.check_arg_id(id), id; + } + FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + auto index = get_arg_index_by_name(id); + if (index == invalid_arg_index) on_error("named argument is not found"); + return context_.check_arg_id(index), index; +#else + (void)id; + on_error("compile-time checks for named arguments require C++20 support"); + return 0; +#endif + } + + FMT_CONSTEXPR void on_replacement_field(int, const Char*) {} + + FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) + -> const Char* { + context_.advance_to(context_.begin() + (begin - &*context_.begin())); + // id >= 0 check is a workaround for gcc 10 bug (#2065). + return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; + } + + FMT_CONSTEXPR void on_error(const char* message) { + context_.on_error(message); + } +}; + +// Reports a compile-time error if S is not a valid format string. +template ::value)> +FMT_INLINE void check_format_string(const S&) { +#ifdef FMT_ENFORCE_COMPILE_STRING + static_assert(is_compile_string::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " + "FMT_STRING."); +#endif +} +template ::value)> +void check_format_string(S format_str) { + FMT_CONSTEXPR auto s = basic_string_view(format_str); + using checker = format_string_checker...>; + FMT_CONSTEXPR bool invalid_format = + (parse_format_string(s, checker(s, {})), true); + ignore_unused(invalid_format); +} + +// Don't use type_identity for args to simplify symbols. +template +void vformat_to(buffer& buf, basic_string_view fmt, + basic_format_args args, + locale_ref loc = {}); + +FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); +#ifndef _WIN32 +inline void vprint_mojibake(std::FILE*, string_view, format_args) {} +#endif +FMT_END_DETAIL_NAMESPACE + +// A formatter specialization for the core types corresponding to detail::type +// constants. +template +struct formatter::value != + detail::type::custom_type>> { + private: + detail::dynamic_format_specs specs_; + + public: + // Parses format specifiers stopping either at the end of the range or at the + // terminating '}'. + template + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { + auto begin = ctx.begin(), end = ctx.end(); + if (begin == end) return begin; + using handler_type = detail::dynamic_specs_handler; + auto type = detail::type_constant::value; + auto checker = + detail::specs_checker(handler_type(specs_, ctx), type); + auto it = detail::parse_format_specs(begin, end, checker); + auto eh = ctx.error_handler(); + switch (type) { + case detail::type::none_type: + FMT_ASSERT(false, "invalid argument type"); + break; + case detail::type::bool_type: + if (specs_.type == presentation_type::none || + specs_.type == presentation_type::string) { + break; + } + FMT_FALLTHROUGH; + case detail::type::int_type: + case detail::type::uint_type: + case detail::type::long_long_type: + case detail::type::ulong_long_type: + case detail::type::int128_type: + case detail::type::uint128_type: + detail::check_int_type_spec(specs_.type, eh); + break; + case detail::type::char_type: + detail::check_char_specs(specs_, eh); + break; + case detail::type::float_type: + if (detail::const_check(FMT_USE_FLOAT)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "float support disabled"); + break; + case detail::type::double_type: + if (detail::const_check(FMT_USE_DOUBLE)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "double support disabled"); + break; + case detail::type::long_double_type: + if (detail::const_check(FMT_USE_LONG_DOUBLE)) + detail::parse_float_type_spec(specs_, eh); + else + FMT_ASSERT(false, "long double support disabled"); + break; + case detail::type::cstring_type: + detail::check_cstring_type_spec(specs_.type, eh); + break; + case detail::type::string_type: + detail::check_string_type_spec(specs_.type, eh); + break; + case detail::type::pointer_type: + detail::check_pointer_type_spec(specs_.type, eh); + break; + case detail::type::custom_type: + // Custom format specifiers are checked in parse functions of + // formatter specializations. + break; + } + return it; + } + + template ::value, + enable_if_t<(U == detail::type::string_type || + U == detail::type::cstring_type || + U == detail::type::char_type), + int> = 0> + FMT_CONSTEXPR void set_debug_format() { + specs_.type = presentation_type::debug; + } + + template + FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const + -> decltype(ctx.out()); +}; + +#define FMT_FORMAT_AS(Type, Base) \ + template \ + struct formatter : formatter { \ + template \ + auto format(Type const& val, FormatContext& ctx) const \ + -> decltype(ctx.out()) { \ + return formatter::format(static_cast(val), ctx); \ + } \ + } + +FMT_FORMAT_AS(signed char, int); +FMT_FORMAT_AS(unsigned char, unsigned); +FMT_FORMAT_AS(short, int); +FMT_FORMAT_AS(unsigned short, unsigned); +FMT_FORMAT_AS(long, long long); +FMT_FORMAT_AS(unsigned long, unsigned long long); +FMT_FORMAT_AS(Char*, const Char*); +FMT_FORMAT_AS(std::basic_string, basic_string_view); +FMT_FORMAT_AS(std::nullptr_t, const void*); +FMT_FORMAT_AS(detail::std_string_view, basic_string_view); + +template struct basic_runtime { basic_string_view str; }; + +/** A compile-time format string. */ +template class basic_format_string { + private: + basic_string_view str_; + + public: + template >::value)> + FMT_CONSTEVAL FMT_INLINE basic_format_string(const S& s) : str_(s) { + static_assert( + detail::count< + (std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); +#ifdef FMT_HAS_CONSTEVAL + if constexpr (detail::count_named_args() == + detail::count_statically_named_args()) { + using checker = detail::format_string_checker...>; + detail::parse_format_string(str_, checker(s, {})); + } +#else + detail::check_format_string(s); +#endif + } + basic_format_string(basic_runtime r) : str_(r.str) {} + + FMT_INLINE operator basic_string_view() const { return str_; } +}; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +// Workaround broken conversion on older gcc. +template using format_string = string_view; +inline auto runtime(string_view s) -> string_view { return s; } +#else +template +using format_string = basic_format_string...>; +/** + \rst + Creates a runtime format string. + + **Example**:: + + // Check format string at runtime instead of compile-time. + fmt::print(fmt::runtime("{:d}"), "I am not a number"); + \endrst + */ +inline auto runtime(string_view s) -> basic_runtime { return {{s}}; } +#endif + +FMT_API auto vformat(string_view fmt, format_args args) -> std::string; + +/** + \rst + Formats ``args`` according to specifications in ``fmt`` and returns the result + as a string. + + **Example**:: + + #include + std::string message = fmt::format("The answer is {}.", 42); + \endrst +*/ +template +FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) + -> std::string { + return vformat(fmt, fmt::make_format_args(args...)); +} + +/** Formats a string and writes the output to ``out``. */ +template ::value)> +auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, fmt, args, {}); + return detail::get_iterator(buf, out); +} + +/** + \rst + Formats ``args`` according to specifications in ``fmt``, writes the result to + the output iterator ``out`` and returns the iterator past the end of the output + range. `format_to` does not append a terminating null character. + + **Example**:: + + auto out = std::vector(); + fmt::format_to(std::back_inserter(out), "{}", 42); + \endrst + */ +template ::value)> +FMT_INLINE auto format_to(OutputIt out, format_string fmt, T&&... args) + -> OutputIt { + return vformat_to(out, fmt, fmt::make_format_args(args...)); +} + +template struct format_to_n_result { + /** Iterator past the end of the output range. */ + OutputIt out; + /** Total (not truncated) output size. */ + size_t size; +}; + +template ::value)> +auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) + -> format_to_n_result { + using traits = detail::fixed_buffer_traits; + auto buf = detail::iterator_buffer(out, n); + detail::vformat_to(buf, fmt, args, {}); + return {buf.out(), buf.count()}; +} + +/** + \rst + Formats ``args`` according to specifications in ``fmt``, writes up to ``n`` + characters of the result to the output iterator ``out`` and returns the total + (not truncated) output size and the iterator past the end of the output range. + `format_to_n` does not append a terminating null character. + \endrst + */ +template ::value)> +FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, + T&&... args) -> format_to_n_result { + return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); +} + +/** Returns the number of chars in the output of ``format(fmt, args...)``. */ +template +FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, + T&&... args) -> size_t { + auto buf = detail::counting_buffer<>(); + detail::vformat_to(buf, string_view(fmt), + format_args(fmt::make_format_args(args...)), {}); + return buf.count(); +} + +FMT_API void vprint(string_view fmt, format_args args); +FMT_API void vprint(std::FILE* f, string_view fmt, format_args args); + +/** + \rst + Formats ``args`` according to specifications in ``fmt`` and writes the output + to ``stdout``. + + **Example**:: + + fmt::print("Elapsed time: {0:.2f} seconds", 1.23); + \endrst + */ +template +FMT_INLINE void print(format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::is_utf8() ? vprint(fmt, vargs) + : detail::vprint_mojibake(stdout, fmt, vargs); +} + +/** + \rst + Formats ``args`` according to specifications in ``fmt`` and writes the + output to the file ``f``. + + **Example**:: + + fmt::print(stderr, "Don't {}!", "panic"); + \endrst + */ +template +FMT_INLINE void print(std::FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::is_utf8() ? vprint(f, fmt, vargs) + : detail::vprint_mojibake(f, fmt, vargs); +} + +FMT_MODULE_EXPORT_END +FMT_GCC_PRAGMA("GCC pop_options") +FMT_END_NAMESPACE + +#ifdef FMT_HEADER_ONLY +# include "format.h" +#endif +#endif // FMT_CORE_H_ diff --git a/libkram/fmt/fmt.cpp b/libkram/fmt/fmt.cpp new file mode 100644 index 00000000..971d46da --- /dev/null +++ b/libkram/fmt/fmt.cpp @@ -0,0 +1,100 @@ +module; +#ifndef __cpp_modules +# error Module not supported. +#endif + +// put all implementation-provided headers into the global module fragment +// to prevent attachment to this module +#if !defined(_CRT_SECURE_NO_WARNINGS) && defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif +#if !defined(WIN32_LEAN_AND_MEAN) && defined(_WIN32) +# define WIN32_LEAN_AND_MEAN +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if _MSC_VER +# include +#endif +#if defined __APPLE__ || defined(__FreeBSD__) +# include +#endif +#if __has_include() +# include +#endif +#if (__has_include() || defined(__APPLE__) || \ + defined(__linux__)) && \ + (!defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP)) +# include +# include +# include +# ifndef _WIN32 +# include +# else +# include +# endif +#endif +#ifdef _WIN32 +# include +#endif + +export module fmt; + +#define FMT_MODULE_EXPORT export +#define FMT_MODULE_EXPORT_BEGIN export { +#define FMT_MODULE_EXPORT_END } +#define FMT_BEGIN_DETAIL_NAMESPACE \ + } \ + namespace detail { +#define FMT_END_DETAIL_NAMESPACE \ + } \ + export { +// all library-provided declarations and definitions +// must be in the module purview to be exported +#include "args.h" +#include "chrono.h" +#include "color.h" +#include "compile.h" +#include "format.h" +#include "os.h" +#include "printf.h" +#include "xchar.h" + +// gcc doesn't yet implement private module fragments +#if !FMT_GCC_VERSION +module : private; +#endif + +// These are already included in project +//#include "format.cpp" +//#include "os.cpp" diff --git a/libkram/fmt/format-inl.h b/libkram/fmt/format-inl.h new file mode 100644 index 00000000..2d3a4d61 --- /dev/null +++ b/libkram/fmt/format-inl.h @@ -0,0 +1,1754 @@ +// Formatting library for C++ - implementation +// +// Copyright (c) 2012 - 2016, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_FORMAT_INL_H_ +#define FMT_FORMAT_INL_H_ + +#include +#include +#include // errno +#include +#include +#include +#include // std::memmove +#include +#include + +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +# include +#endif + +#ifdef _WIN32 +# include // _isatty +#endif + +#include "format.h" + +FMT_BEGIN_NAMESPACE +namespace detail { + +FMT_FUNC void assert_fail(const char* file, int line, const char* message) { + // Use unchecked std::fprintf to avoid triggering another assertion when + // writing to stderr fails + std::fprintf(stderr, "%s:%d: assertion failed: %s", file, line, message); + // Chosen instead of std::abort to satisfy Clang in CUDA mode during device + // code pass. + std::terminate(); +} + +FMT_FUNC void throw_format_error(const char* message) { + FMT_THROW(format_error(message)); +} + +FMT_FUNC void format_error_code(detail::buffer& out, int error_code, + string_view message) noexcept { + // Report error code making sure that the output fits into + // inline_buffer_size to avoid dynamic memory allocation and potential + // bad_alloc. + out.try_resize(0); + static const char SEP[] = ": "; + static const char ERROR_STR[] = "error "; + // Subtract 2 to account for terminating null characters in SEP and ERROR_STR. + size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2; + auto abs_value = static_cast>(error_code); + if (detail::is_negative(error_code)) { + abs_value = 0 - abs_value; + ++error_code_size; + } + error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); + auto it = buffer_appender(out); + if (message.size() <= inline_buffer_size - error_code_size) + format_to(it, FMT_STRING("{}{}"), message, SEP); + format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + FMT_ASSERT(out.size() <= inline_buffer_size, ""); +} + +FMT_FUNC void report_error(format_func func, int error_code, + const char* message) noexcept { + memory_buffer full_message; + func(full_message, error_code, message); + // Don't use fwrite_fully because the latter may throw. + if (std::fwrite(full_message.data(), full_message.size(), 1, stderr) > 0) + std::fputc('\n', stderr); +} + +// A wrapper around fwrite that throws on error. +inline void fwrite_fully(const void* ptr, size_t size, size_t count, + FILE* stream) { + size_t written = std::fwrite(ptr, size, count, stream); + if (written < count) + FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); +} + +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +template +locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { + static_assert(std::is_same::value, ""); +} + +template Locale locale_ref::get() const { + static_assert(std::is_same::value, ""); + return locale_ ? *static_cast(locale_) : std::locale(); +} + +template +FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { + auto& facet = std::use_facet>(loc.get()); + auto grouping = facet.grouping(); + auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); + return {std::move(grouping), thousands_sep}; +} +template FMT_FUNC Char decimal_point_impl(locale_ref loc) { + return std::use_facet>(loc.get()) + .decimal_point(); +} +#else +template +FMT_FUNC auto thousands_sep_impl(locale_ref) -> thousands_sep_result { + return {"\03", FMT_STATIC_THOUSANDS_SEPARATOR}; +} +template FMT_FUNC Char decimal_point_impl(locale_ref) { + return '.'; +} +#endif + +FMT_FUNC auto write_loc(appender out, loc_value value, + const format_specs& specs, locale_ref loc) -> bool { +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR + auto locale = loc.get(); + // We cannot use the num_put facet because it may produce output in + // a wrong encoding. + using facet = format_facet; + if (std::has_facet(locale)) + return std::use_facet(locale).put(out, value, specs); + return facet(locale).put(out, value, specs); +#endif + return false; +} +} // namespace detail + +template typename Locale::id format_facet::id; + +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +template format_facet::format_facet(Locale& loc) { + auto& numpunct = std::use_facet>(loc); + grouping_ = numpunct.grouping(); + if (!grouping_.empty()) separator_ = std::string(1, numpunct.thousands_sep()); +} + +template <> +FMT_API FMT_FUNC auto format_facet::do_put( + appender out, loc_value val, const format_specs& specs) const -> bool { + return val.visit( + detail::loc_writer<>{out, specs, separator_, grouping_, decimal_point_}); +} +#endif + +#if !FMT_MSC_VERSION +FMT_API FMT_FUNC format_error::~format_error() noexcept = default; +#endif + +FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, + format_args args) { + auto ec = std::error_code(error_code, std::generic_category()); + return std::system_error(ec, vformat(format_str, args)); +} + +namespace detail { + +template inline bool operator==(basic_fp x, basic_fp y) { + return x.f == y.f && x.e == y.e; +} + +// Compilers should be able to optimize this into the ror instruction. +FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { + r &= 31; + return (n >> r) | (n << (32 - r)); +} +FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { + r &= 63; + return (n >> r) | (n << (64 - r)); +} + +// Computes 128-bit result of multiplication of two 64-bit unsigned integers. +inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return {static_cast(p >> 64), static_cast(p)}; +#elif defined(_MSC_VER) && defined(_M_X64) + auto result = uint128_fallback(); + result.lo_ = _umul128(x, y, &result.hi_); + return result; +#else + const uint64_t mask = static_cast(max_value()); + + uint64_t a = x >> 32; + uint64_t b = x & mask; + uint64_t c = y >> 32; + uint64_t d = y & mask; + + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + + uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + (bd & mask)}; +#endif +} + +// Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. +namespace dragonbox { +// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. +inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return static_cast(p >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + return umul128(x, y).high(); +#endif +} + +// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +inline uint128_fallback umul192_upper128(uint64_t x, + uint128_fallback y) noexcept { + uint128_fallback r = umul128(x, y.high()); + r += umul128_upper64(x, y.low()); + return r; +} + +// Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { + return umul128_upper64(static_cast(x) << 32, y); +} + +// Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +inline uint128_fallback umul192_lower128(uint64_t x, + uint128_fallback y) noexcept { + uint64_t high = x * y.high(); + uint128_fallback high_low = umul128(x, y.low()); + return {high + high_low.high(), high_low.low()}; +} + +// Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a +// 64-bit unsigned integer. +inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { + return x * y; +} + +// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from +// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. +inline int floor_log10_pow2(int e) noexcept { + FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); + static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); + return (e * 315653) >> 20; +} + +// Various fast log computations. +inline int floor_log2_pow10(int e) noexcept { + FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); + return (e * 1741647) >> 19; +} +inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { + FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); + return (e * 631305 - 261663) >> 21; +} + +static constexpr struct { + uint32_t divisor; + int shift_amount; +} div_small_pow10_infos[] = {{10, 16}, {100, 16}}; + +// Replaces n by floor(n / pow(10, N)) returning true if and only if n is +// divisible by pow(10, N). +// Precondition: n <= pow(10, N + 1). +template +bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { + // The numbers below are chosen such that: + // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, + // 2. nm mod 2^k < m if and only if n is divisible by d, + // where m is magic_number, k is shift_amount + // and d is divisor. + // + // Item 1 is a common technique of replacing division by a constant with + // multiplication, see e.g. "Division by Invariant Integers Using + // Multiplication" by Granlund and Montgomery (1994). magic_number (m) is set + // to ceil(2^k/d) for large enough k. + // The idea for item 2 originates from Schubfach. + constexpr auto info = div_small_pow10_infos[N - 1]; + FMT_ASSERT(n <= info.divisor * 10, "n is too large"); + constexpr uint32_t magic_number = + (1u << info.shift_amount) / info.divisor + 1; + n *= magic_number; + const uint32_t comparison_mask = (1u << info.shift_amount) - 1; + bool result = (n & comparison_mask) < magic_number; + n >>= info.shift_amount; + return result; +} + +// Computes floor(n / pow(10, N)) for small n and N. +// Precondition: n <= pow(10, N + 1). +template uint32_t small_division_by_pow10(uint32_t n) noexcept { + constexpr auto info = div_small_pow10_infos[N - 1]; + FMT_ASSERT(n <= info.divisor * 10, "n is too large"); + constexpr uint32_t magic_number = + (1u << info.shift_amount) / info.divisor + 1; + return (n * magic_number) >> info.shift_amount; +} + +// Computes floor(n / 10^(kappa + 1)) (float) +inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { + // 1374389535 = ceil(2^37/100) + return static_cast((static_cast(n) * 1374389535) >> 37); +} +// Computes floor(n / 10^(kappa + 1)) (double) +inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { + // 2361183241434822607 = ceil(2^(64+7)/1000) + return umul128_upper64(n, 2361183241434822607ull) >> 7; +} + +// Various subroutines using pow10 cache +template struct cache_accessor; + +template <> struct cache_accessor { + using carrier_uint = float_info::carrier_uint; + using cache_entry_type = uint64_t; + + static uint64_t get_cached_power(int k) noexcept { + FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, + "k is out of range"); + static constexpr const uint64_t pow10_significands[] = { + 0x81ceb32c4b43fcf5, 0xa2425ff75e14fc32, 0xcad2f7f5359a3b3f, + 0xfd87b5f28300ca0e, 0x9e74d1b791e07e49, 0xc612062576589ddb, + 0xf79687aed3eec552, 0x9abe14cd44753b53, 0xc16d9a0095928a28, + 0xf1c90080baf72cb2, 0x971da05074da7bef, 0xbce5086492111aeb, + 0xec1e4a7db69561a6, 0x9392ee8e921d5d08, 0xb877aa3236a4b44a, + 0xe69594bec44de15c, 0x901d7cf73ab0acda, 0xb424dc35095cd810, + 0xe12e13424bb40e14, 0x8cbccc096f5088cc, 0xafebff0bcb24aaff, + 0xdbe6fecebdedd5bf, 0x89705f4136b4a598, 0xabcc77118461cefd, + 0xd6bf94d5e57a42bd, 0x8637bd05af6c69b6, 0xa7c5ac471b478424, + 0xd1b71758e219652c, 0x83126e978d4fdf3c, 0xa3d70a3d70a3d70b, + 0xcccccccccccccccd, 0x8000000000000000, 0xa000000000000000, + 0xc800000000000000, 0xfa00000000000000, 0x9c40000000000000, + 0xc350000000000000, 0xf424000000000000, 0x9896800000000000, + 0xbebc200000000000, 0xee6b280000000000, 0x9502f90000000000, + 0xba43b74000000000, 0xe8d4a51000000000, 0x9184e72a00000000, + 0xb5e620f480000000, 0xe35fa931a0000000, 0x8e1bc9bf04000000, + 0xb1a2bc2ec5000000, 0xde0b6b3a76400000, 0x8ac7230489e80000, + 0xad78ebc5ac620000, 0xd8d726b7177a8000, 0x878678326eac9000, + 0xa968163f0a57b400, 0xd3c21bcecceda100, 0x84595161401484a0, + 0xa56fa5b99019a5c8, 0xcecb8f27f4200f3a, 0x813f3978f8940985, + 0xa18f07d736b90be6, 0xc9f2c9cd04674edf, 0xfc6f7c4045812297, + 0x9dc5ada82b70b59e, 0xc5371912364ce306, 0xf684df56c3e01bc7, + 0x9a130b963a6c115d, 0xc097ce7bc90715b4, 0xf0bdc21abb48db21, + 0x96769950b50d88f5, 0xbc143fa4e250eb32, 0xeb194f8e1ae525fe, + 0x92efd1b8d0cf37bf, 0xb7abc627050305ae, 0xe596b7b0c643c71a, + 0x8f7e32ce7bea5c70, 0xb35dbf821ae4f38c, 0xe0352f62a19e306f}; + return pow10_significands[k - float_info::min_k]; + } + + struct compute_mul_result { + carrier_uint result; + bool is_integer; + }; + struct compute_mul_parity_result { + bool parity; + bool is_integer; + }; + + static compute_mul_result compute_mul( + carrier_uint u, const cache_entry_type& cache) noexcept { + auto r = umul96_upper64(u, cache); + return {static_cast(r >> 32), + static_cast(r) == 0}; + } + + static uint32_t compute_delta(const cache_entry_type& cache, + int beta) noexcept { + return static_cast(cache >> (64 - 1 - beta)); + } + + static compute_mul_parity_result compute_mul_parity( + carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + FMT_ASSERT(beta >= 1, ""); + FMT_ASSERT(beta < 64, ""); + + auto r = umul96_lower64(two_f, cache); + return {((r >> (64 - beta)) & 1) != 0, + static_cast(r >> (32 - beta)) == 0}; + } + + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return static_cast( + (cache - (cache >> (num_significand_bits() + 2))) >> + (64 - num_significand_bits() - 1 - beta)); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return static_cast( + (cache + (cache >> (num_significand_bits() + 1))) >> + (64 - num_significand_bits() - 1 - beta)); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return (static_cast( + cache >> (64 - num_significand_bits() - 2 - beta)) + + 1) / + 2; + } +}; + +template <> struct cache_accessor { + using carrier_uint = float_info::carrier_uint; + using cache_entry_type = uint128_fallback; + + static uint128_fallback get_cached_power(int k) noexcept { + FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, + "k is out of range"); + + static constexpr const uint128_fallback pow10_significands[] = { +#if FMT_USE_FULL_CACHE_DRAGONBOX + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0x9faacf3df73609b1, 0x77b191618c54e9ad}, + {0xc795830d75038c1d, 0xd59df5b9ef6a2418}, + {0xf97ae3d0d2446f25, 0x4b0573286b44ad1e}, + {0x9becce62836ac577, 0x4ee367f9430aec33}, + {0xc2e801fb244576d5, 0x229c41f793cda740}, + {0xf3a20279ed56d48a, 0x6b43527578c11110}, + {0x9845418c345644d6, 0x830a13896b78aaaa}, + {0xbe5691ef416bd60c, 0x23cc986bc656d554}, + {0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa9}, + {0x94b3a202eb1c3f39, 0x7bf7d71432f3d6aa}, + {0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc54}, + {0xe858ad248f5c22c9, 0xd1b3400f8f9cff69}, + {0x91376c36d99995be, 0x23100809b9c21fa2}, + {0xb58547448ffffb2d, 0xabd40a0c2832a78b}, + {0xe2e69915b3fff9f9, 0x16c90c8f323f516d}, + {0x8dd01fad907ffc3b, 0xae3da7d97f6792e4}, + {0xb1442798f49ffb4a, 0x99cd11cfdf41779d}, + {0xdd95317f31c7fa1d, 0x40405643d711d584}, + {0x8a7d3eef7f1cfc52, 0x482835ea666b2573}, + {0xad1c8eab5ee43b66, 0xda3243650005eed0}, + {0xd863b256369d4a40, 0x90bed43e40076a83}, + {0x873e4f75e2224e68, 0x5a7744a6e804a292}, + {0xa90de3535aaae202, 0x711515d0a205cb37}, + {0xd3515c2831559a83, 0x0d5a5b44ca873e04}, + {0x8412d9991ed58091, 0xe858790afe9486c3}, + {0xa5178fff668ae0b6, 0x626e974dbe39a873}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0x80fa687f881c7f8e, 0x7ce66634bc9d0b9a}, + {0xa139029f6a239f72, 0x1c1fffc1ebc44e81}, + {0xc987434744ac874e, 0xa327ffb266b56221}, + {0xfbe9141915d7a922, 0x4bf1ff9f0062baa9}, + {0x9d71ac8fada6c9b5, 0x6f773fc3603db4aa}, + {0xc4ce17b399107c22, 0xcb550fb4384d21d4}, + {0xf6019da07f549b2b, 0x7e2a53a146606a49}, + {0x99c102844f94e0fb, 0x2eda7444cbfc426e}, + {0xc0314325637a1939, 0xfa911155fefb5309}, + {0xf03d93eebc589f88, 0x793555ab7eba27cb}, + {0x96267c7535b763b5, 0x4bc1558b2f3458df}, + {0xbbb01b9283253ca2, 0x9eb1aaedfb016f17}, + {0xea9c227723ee8bcb, 0x465e15a979c1cadd}, + {0x92a1958a7675175f, 0x0bfacd89ec191eca}, + {0xb749faed14125d36, 0xcef980ec671f667c}, + {0xe51c79a85916f484, 0x82b7e12780e7401b}, + {0x8f31cc0937ae58d2, 0xd1b2ecb8b0908811}, + {0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa16}, + {0xdfbdcece67006ac9, 0x67a791e093e1d49b}, + {0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e1}, + {0xaecc49914078536d, 0x58fae9f773886e19}, + {0xda7f5bf590966848, 0xaf39a475506a899f}, + {0x888f99797a5e012d, 0x6d8406c952429604}, + {0xaab37fd7d8f58178, 0xc8e5087ba6d33b84}, + {0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a65}, + {0x855c3be0a17fcd26, 0x5cf2eea09a550680}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0xd0601d8efc57b08b, 0xf13b94daf124da27}, + {0x823c12795db6ce57, 0x76c53d08d6b70859}, + {0xa2cb1717b52481ed, 0x54768c4b0c64ca6f}, + {0xcb7ddcdda26da268, 0xa9942f5dcf7dfd0a}, + {0xfe5d54150b090b02, 0xd3f93b35435d7c4d}, + {0x9efa548d26e5a6e1, 0xc47bc5014a1a6db0}, + {0xc6b8e9b0709f109a, 0x359ab6419ca1091c}, + {0xf867241c8cc6d4c0, 0xc30163d203c94b63}, + {0x9b407691d7fc44f8, 0x79e0de63425dcf1e}, + {0xc21094364dfb5636, 0x985915fc12f542e5}, + {0xf294b943e17a2bc4, 0x3e6f5b7b17b2939e}, + {0x979cf3ca6cec5b5a, 0xa705992ceecf9c43}, + {0xbd8430bd08277231, 0x50c6ff782a838354}, + {0xece53cec4a314ebd, 0xa4f8bf5635246429}, + {0x940f4613ae5ed136, 0x871b7795e136be9a}, + {0xb913179899f68584, 0x28e2557b59846e40}, + {0xe757dd7ec07426e5, 0x331aeada2fe589d0}, + {0x9096ea6f3848984f, 0x3ff0d2c85def7622}, + {0xb4bca50b065abe63, 0x0fed077a756b53aa}, + {0xe1ebce4dc7f16dfb, 0xd3e8495912c62895}, + {0x8d3360f09cf6e4bd, 0x64712dd7abbbd95d}, + {0xb080392cc4349dec, 0xbd8d794d96aacfb4}, + {0xdca04777f541c567, 0xecf0d7a0fc5583a1}, + {0x89e42caaf9491b60, 0xf41686c49db57245}, + {0xac5d37d5b79b6239, 0x311c2875c522ced6}, + {0xd77485cb25823ac7, 0x7d633293366b828c}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xa8530886b54dbdeb, 0xd9f57f830283fdfd}, + {0xd267caa862a12d66, 0xd072df63c324fd7c}, + {0x8380dea93da4bc60, 0x4247cb9e59f71e6e}, + {0xa46116538d0deb78, 0x52d9be85f074e609}, + {0xcd795be870516656, 0x67902e276c921f8c}, + {0x806bd9714632dff6, 0x00ba1cd8a3db53b7}, + {0xa086cfcd97bf97f3, 0x80e8a40eccd228a5}, + {0xc8a883c0fdaf7df0, 0x6122cd128006b2ce}, + {0xfad2a4b13d1b5d6c, 0x796b805720085f82}, + {0x9cc3a6eec6311a63, 0xcbe3303674053bb1}, + {0xc3f490aa77bd60fc, 0xbedbfc4411068a9d}, + {0xf4f1b4d515acb93b, 0xee92fb5515482d45}, + {0x991711052d8bf3c5, 0x751bdd152d4d1c4b}, + {0xbf5cd54678eef0b6, 0xd262d45a78a0635e}, + {0xef340a98172aace4, 0x86fb897116c87c35}, + {0x9580869f0e7aac0e, 0xd45d35e6ae3d4da1}, + {0xbae0a846d2195712, 0x8974836059cca10a}, + {0xe998d258869facd7, 0x2bd1a438703fc94c}, + {0x91ff83775423cc06, 0x7b6306a34627ddd0}, + {0xb67f6455292cbf08, 0x1a3bc84c17b1d543}, + {0xe41f3d6a7377eeca, 0x20caba5f1d9e4a94}, + {0x8e938662882af53e, 0x547eb47b7282ee9d}, + {0xb23867fb2a35b28d, 0xe99e619a4f23aa44}, + {0xdec681f9f4c31f31, 0x6405fa00e2ec94d5}, + {0x8b3c113c38f9f37e, 0xde83bc408dd3dd05}, + {0xae0b158b4738705e, 0x9624ab50b148d446}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d7}, + {0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4d}, + {0xd47487cc8470652b, 0x7647c32000696720}, + {0x84c8d4dfd2c63f3b, 0x29ecd9f40041e074}, + {0xa5fb0a17c777cf09, 0xf468107100525891}, + {0xcf79cc9db955c2cc, 0x7182148d4066eeb5}, + {0x81ac1fe293d599bf, 0xc6f14cd848405531}, + {0xa21727db38cb002f, 0xb8ada00e5a506a7d}, + {0xca9cf1d206fdc03b, 0xa6d90811f0e4851d}, + {0xfd442e4688bd304a, 0x908f4a166d1da664}, + {0x9e4a9cec15763e2e, 0x9a598e4e043287ff}, + {0xc5dd44271ad3cdba, 0x40eff1e1853f29fe}, + {0xf7549530e188c128, 0xd12bee59e68ef47d}, + {0x9a94dd3e8cf578b9, 0x82bb74f8301958cf}, + {0xc13a148e3032d6e7, 0xe36a52363c1faf02}, + {0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac2}, + {0x96f5600f15a7b7e5, 0x29ab103a5ef8c0ba}, + {0xbcb2b812db11a5de, 0x7415d448f6b6f0e8}, + {0xebdf661791d60f56, 0x111b495b3464ad22}, + {0x936b9fcebb25c995, 0xcab10dd900beec35}, + {0xb84687c269ef3bfb, 0x3d5d514f40eea743}, + {0xe65829b3046b0afa, 0x0cb4a5a3112a5113}, + {0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ac}, + {0xb3f4e093db73a093, 0x59ed216765690f57}, + {0xe0f218b8d25088b8, 0x306869c13ec3532d}, + {0x8c974f7383725573, 0x1e414218c73a13fc}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0xdbac6c247d62a583, 0xdf45f746b74abf3a}, + {0x894bc396ce5da772, 0x6b8bba8c328eb784}, + {0xab9eb47c81f5114f, 0x066ea92f3f326565}, + {0xd686619ba27255a2, 0xc80a537b0efefebe}, + {0x8613fd0145877585, 0xbd06742ce95f5f37}, + {0xa798fc4196e952e7, 0x2c48113823b73705}, + {0xd17f3b51fca3a7a0, 0xf75a15862ca504c6}, + {0x82ef85133de648c4, 0x9a984d73dbe722fc}, + {0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebbb}, + {0xcc963fee10b7d1b3, 0x318df905079926a9}, + {0xffbbcfe994e5c61f, 0xfdf17746497f7053}, + {0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa634}, + {0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc1}, + {0xf9bd690a1b68637b, 0x3dfdce7aa3c673b1}, + {0x9c1661a651213e2d, 0x06bea10ca65c084f}, + {0xc31bfa0fe5698db8, 0x486e494fcff30a63}, + {0xf3e2f893dec3f126, 0x5a89dba3c3efccfb}, + {0x986ddb5c6b3a76b7, 0xf89629465a75e01d}, + {0xbe89523386091465, 0xf6bbb397f1135824}, + {0xee2ba6c0678b597f, 0x746aa07ded582e2d}, + {0x94db483840b717ef, 0xa8c2a44eb4571cdd}, + {0xba121a4650e4ddeb, 0x92f34d62616ce414}, + {0xe896a0d7e51e1566, 0x77b020baf9c81d18}, + {0x915e2486ef32cd60, 0x0ace1474dc1d122f}, + {0xb5b5ada8aaff80b8, 0x0d819992132456bb}, + {0xe3231912d5bf60e6, 0x10e1fff697ed6c6a}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb3}, + {0xddd0467c64bce4a0, 0xac7cb3f6d05ddbdf}, + {0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96c}, + {0xad4ab7112eb3929d, 0x86c16c98d2c953c7}, + {0xd89d64d57a607744, 0xe871c7bf077ba8b8}, + {0x87625f056c7c4a8b, 0x11471cd764ad4973}, + {0xa93af6c6c79b5d2d, 0xd598e40d3dd89bd0}, + {0xd389b47879823479, 0x4aff1d108d4ec2c4}, + {0x843610cb4bf160cb, 0xcedf722a585139bb}, + {0xa54394fe1eedb8fe, 0xc2974eb4ee658829}, + {0xce947a3da6a9273e, 0x733d226229feea33}, + {0x811ccc668829b887, 0x0806357d5a3f5260}, + {0xa163ff802a3426a8, 0xca07c2dcb0cf26f8}, + {0xc9bcff6034c13052, 0xfc89b393dd02f0b6}, + {0xfc2c3f3841f17c67, 0xbbac2078d443ace3}, + {0x9d9ba7832936edc0, 0xd54b944b84aa4c0e}, + {0xc5029163f384a931, 0x0a9e795e65d4df12}, + {0xf64335bcf065d37d, 0x4d4617b5ff4a16d6}, + {0x99ea0196163fa42e, 0x504bced1bf8e4e46}, + {0xc06481fb9bcf8d39, 0xe45ec2862f71e1d7}, + {0xf07da27a82c37088, 0x5d767327bb4e5a4d}, + {0x964e858c91ba2655, 0x3a6a07f8d510f870}, + {0xbbe226efb628afea, 0x890489f70a55368c}, + {0xeadab0aba3b2dbe5, 0x2b45ac74ccea842f}, + {0x92c8ae6b464fc96f, 0x3b0b8bc90012929e}, + {0xb77ada0617e3bbcb, 0x09ce6ebb40173745}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0x8f57fa54c2a9eab6, 0x9fa946824a12232e}, + {0xb32df8e9f3546564, 0x47939822dc96abfa}, + {0xdff9772470297ebd, 0x59787e2b93bc56f8}, + {0x8bfbea76c619ef36, 0x57eb4edb3c55b65b}, + {0xaefae51477a06b03, 0xede622920b6b23f2}, + {0xdab99e59958885c4, 0xe95fab368e45ecee}, + {0x88b402f7fd75539b, 0x11dbcb0218ebb415}, + {0xaae103b5fcd2a881, 0xd652bdc29f26a11a}, + {0xd59944a37c0752a2, 0x4be76d3346f04960}, + {0x857fcae62d8493a5, 0x6f70a4400c562ddc}, + {0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb953}, + {0xd097ad07a71f26b2, 0x7e2000a41346a7a8}, + {0x825ecc24c873782f, 0x8ed400668c0c28c9}, + {0xa2f67f2dfa90563b, 0x728900802f0f32fb}, + {0xcbb41ef979346bca, 0x4f2b40a03ad2ffba}, + {0xfea126b7d78186bc, 0xe2f610c84987bfa9}, + {0x9f24b832e6b0f436, 0x0dd9ca7d2df4d7ca}, + {0xc6ede63fa05d3143, 0x91503d1c79720dbc}, + {0xf8a95fcf88747d94, 0x75a44c6397ce912b}, + {0x9b69dbe1b548ce7c, 0xc986afbe3ee11abb}, + {0xc24452da229b021b, 0xfbe85badce996169}, + {0xf2d56790ab41c2a2, 0xfae27299423fb9c4}, + {0x97c560ba6b0919a5, 0xdccd879fc967d41b}, + {0xbdb6b8e905cb600f, 0x5400e987bbc1c921}, + {0xed246723473e3813, 0x290123e9aab23b69}, + {0x9436c0760c86e30b, 0xf9a0b6720aaf6522}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0xe7958cb87392c2c2, 0xb60b1d1230b20e05}, + {0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c3}, + {0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af4}, + {0xe2280b6c20dd5232, 0x25c6da63c38de1b1}, + {0x8d590723948a535f, 0x579c487e5a38ad0f}, + {0xb0af48ec79ace837, 0x2d835a9df0c6d852}, + {0xdcdb1b2798182244, 0xf8e431456cf88e66}, + {0x8a08f0f8bf0f156b, 0x1b8e9ecb641b5900}, + {0xac8b2d36eed2dac5, 0xe272467e3d222f40}, + {0xd7adf884aa879177, 0x5b0ed81dcc6abb10}, + {0x86ccbb52ea94baea, 0x98e947129fc2b4ea}, + {0xa87fea27a539e9a5, 0x3f2398d747b36225}, + {0xd29fe4b18e88640e, 0x8eec7f0d19a03aae}, + {0x83a3eeeef9153e89, 0x1953cf68300424ad}, + {0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd8}, + {0xcdb02555653131b6, 0x3792f412cb06794e}, + {0x808e17555f3ebf11, 0xe2bbd88bbee40bd1}, + {0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec5}, + {0xc8de047564d20a8b, 0xf245825a5a445276}, + {0xfb158592be068d2e, 0xeed6e2f0f0d56713}, + {0x9ced737bb6c4183d, 0x55464dd69685606c}, + {0xc428d05aa4751e4c, 0xaa97e14c3c26b887}, + {0xf53304714d9265df, 0xd53dd99f4b3066a9}, + {0x993fe2c6d07b7fab, 0xe546a8038efe402a}, + {0xbf8fdb78849a5f96, 0xde98520472bdd034}, + {0xef73d256a5c0f77c, 0x963e66858f6d4441}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xbb127c53b17ec159, 0x5560c018580d5d53}, + {0xe9d71b689dde71af, 0xaab8f01e6e10b4a7}, + {0x9226712162ab070d, 0xcab3961304ca70e9}, + {0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d23}, + {0xe45c10c42a2b3b05, 0x8cb89a7db77c506b}, + {0x8eb98a7a9a5b04e3, 0x77f3608e92adb243}, + {0xb267ed1940f1c61c, 0x55f038b237591ed4}, + {0xdf01e85f912e37a3, 0x6b6c46dec52f6689}, + {0x8b61313bbabce2c6, 0x2323ac4b3b3da016}, + {0xae397d8aa96c1b77, 0xabec975e0a0d081b}, + {0xd9c7dced53c72255, 0x96e7bd358c904a22}, + {0x881cea14545c7575, 0x7e50d64177da2e55}, + {0xaa242499697392d2, 0xdde50bd1d5d0b9ea}, + {0xd4ad2dbfc3d07787, 0x955e4ec64b44e865}, + {0x84ec3c97da624ab4, 0xbd5af13bef0b113f}, + {0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58f}, + {0xcfb11ead453994ba, 0x67de18eda5814af3}, + {0x81ceb32c4b43fcf4, 0x80eacf948770ced8}, + {0xa2425ff75e14fc31, 0xa1258379a94d028e}, + {0xcad2f7f5359a3b3e, 0x096ee45813a04331}, + {0xfd87b5f28300ca0d, 0x8bca9d6e188853fd}, + {0x9e74d1b791e07e48, 0x775ea264cf55347e}, + {0xc612062576589dda, 0x95364afe032a819e}, + {0xf79687aed3eec551, 0x3a83ddbd83f52205}, + {0x9abe14cd44753b52, 0xc4926a9672793543}, + {0xc16d9a0095928a27, 0x75b7053c0f178294}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0x971da05074da7bee, 0xd3f6fc16ebca5e04}, + {0xbce5086492111aea, 0x88f4bb1ca6bcf585}, + {0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6}, + {0x9392ee8e921d5d07, 0x3aff322e62439fd0}, + {0xb877aa3236a4b449, 0x09befeb9fad487c3}, + {0xe69594bec44de15b, 0x4c2ebe687989a9b4}, + {0x901d7cf73ab0acd9, 0x0f9d37014bf60a11}, + {0xb424dc35095cd80f, 0x538484c19ef38c95}, + {0xe12e13424bb40e13, 0x2865a5f206b06fba}, + {0x8cbccc096f5088cb, 0xf93f87b7442e45d4}, + {0xafebff0bcb24aafe, 0xf78f69a51539d749}, + {0xdbe6fecebdedd5be, 0xb573440e5a884d1c}, + {0x89705f4136b4a597, 0x31680a88f8953031}, + {0xabcc77118461cefc, 0xfdc20d2b36ba7c3e}, + {0xd6bf94d5e57a42bc, 0x3d32907604691b4d}, + {0x8637bd05af6c69b5, 0xa63f9a49c2c1b110}, + {0xa7c5ac471b478423, 0x0fcf80dc33721d54}, + {0xd1b71758e219652b, 0xd3c36113404ea4a9}, + {0x83126e978d4fdf3b, 0x645a1cac083126ea}, + {0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4}, + {0xcccccccccccccccc, 0xcccccccccccccccd}, + {0x8000000000000000, 0x0000000000000000}, + {0xa000000000000000, 0x0000000000000000}, + {0xc800000000000000, 0x0000000000000000}, + {0xfa00000000000000, 0x0000000000000000}, + {0x9c40000000000000, 0x0000000000000000}, + {0xc350000000000000, 0x0000000000000000}, + {0xf424000000000000, 0x0000000000000000}, + {0x9896800000000000, 0x0000000000000000}, + {0xbebc200000000000, 0x0000000000000000}, + {0xee6b280000000000, 0x0000000000000000}, + {0x9502f90000000000, 0x0000000000000000}, + {0xba43b74000000000, 0x0000000000000000}, + {0xe8d4a51000000000, 0x0000000000000000}, + {0x9184e72a00000000, 0x0000000000000000}, + {0xb5e620f480000000, 0x0000000000000000}, + {0xe35fa931a0000000, 0x0000000000000000}, + {0x8e1bc9bf04000000, 0x0000000000000000}, + {0xb1a2bc2ec5000000, 0x0000000000000000}, + {0xde0b6b3a76400000, 0x0000000000000000}, + {0x8ac7230489e80000, 0x0000000000000000}, + {0xad78ebc5ac620000, 0x0000000000000000}, + {0xd8d726b7177a8000, 0x0000000000000000}, + {0x878678326eac9000, 0x0000000000000000}, + {0xa968163f0a57b400, 0x0000000000000000}, + {0xd3c21bcecceda100, 0x0000000000000000}, + {0x84595161401484a0, 0x0000000000000000}, + {0xa56fa5b99019a5c8, 0x0000000000000000}, + {0xcecb8f27f4200f3a, 0x0000000000000000}, + {0x813f3978f8940984, 0x4000000000000000}, + {0xa18f07d736b90be5, 0x5000000000000000}, + {0xc9f2c9cd04674ede, 0xa400000000000000}, + {0xfc6f7c4045812296, 0x4d00000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xc5371912364ce305, 0x6c28000000000000}, + {0xf684df56c3e01bc6, 0xc732000000000000}, + {0x9a130b963a6c115c, 0x3c7f400000000000}, + {0xc097ce7bc90715b3, 0x4b9f100000000000}, + {0xf0bdc21abb48db20, 0x1e86d40000000000}, + {0x96769950b50d88f4, 0x1314448000000000}, + {0xbc143fa4e250eb31, 0x17d955a000000000}, + {0xeb194f8e1ae525fd, 0x5dcfab0800000000}, + {0x92efd1b8d0cf37be, 0x5aa1cae500000000}, + {0xb7abc627050305ad, 0xf14a3d9e40000000}, + {0xe596b7b0c643c719, 0x6d9ccd05d0000000}, + {0x8f7e32ce7bea5c6f, 0xe4820023a2000000}, + {0xb35dbf821ae4f38b, 0xdda2802c8a800000}, + {0xe0352f62a19e306e, 0xd50b2037ad200000}, + {0x8c213d9da502de45, 0x4526f422cc340000}, + {0xaf298d050e4395d6, 0x9670b12b7f410000}, + {0xdaf3f04651d47b4c, 0x3c0cdd765f114000}, + {0x88d8762bf324cd0f, 0xa5880a69fb6ac800}, + {0xab0e93b6efee0053, 0x8eea0d047a457a00}, + {0xd5d238a4abe98068, 0x72a4904598d6d880}, + {0x85a36366eb71f041, 0x47a6da2b7f864750}, + {0xa70c3c40a64e6c51, 0x999090b65f67d924}, + {0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d}, + {0x82818f1281ed449f, 0xbff8f10e7a8921a5}, + {0xa321f2d7226895c7, 0xaff72d52192b6a0e}, + {0xcbea6f8ceb02bb39, 0x9bf4f8a69f764491}, + {0xfee50b7025c36a08, 0x02f236d04753d5b5}, + {0x9f4f2726179a2245, 0x01d762422c946591}, + {0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef6}, + {0xf8ebad2b84e0d58b, 0xd2e0898765a7deb3}, + {0x9b934c3b330c8577, 0x63cc55f49f88eb30}, + {0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fc}, + {0xf316271c7fc3908a, 0x8bef464e3945ef7b}, + {0x97edd871cfda3a56, 0x97758bf0e3cbb5ad}, + {0xbde94e8e43d0c8ec, 0x3d52eeed1cbea318}, + {0xed63a231d4c4fb27, 0x4ca7aaa863ee4bde}, + {0x945e455f24fb1cf8, 0x8fe8caa93e74ef6b}, + {0xb975d6b6ee39e436, 0xb3e2fd538e122b45}, + {0xe7d34c64a9c85d44, 0x60dbbca87196b617}, + {0x90e40fbeea1d3a4a, 0xbc8955e946fe31ce}, + {0xb51d13aea4a488dd, 0x6babab6398bdbe42}, + {0xe264589a4dcdab14, 0xc696963c7eed2dd2}, + {0x8d7eb76070a08aec, 0xfc1e1de5cf543ca3}, + {0xb0de65388cc8ada8, 0x3b25a55f43294bcc}, + {0xdd15fe86affad912, 0x49ef0eb713f39ebf}, + {0x8a2dbf142dfcc7ab, 0x6e3569326c784338}, + {0xacb92ed9397bf996, 0x49c2c37f07965405}, + {0xd7e77a8f87daf7fb, 0xdc33745ec97be907}, + {0x86f0ac99b4e8dafd, 0x69a028bb3ded71a4}, + {0xa8acd7c0222311bc, 0xc40832ea0d68ce0d}, + {0xd2d80db02aabd62b, 0xf50a3fa490c30191}, + {0x83c7088e1aab65db, 0x792667c6da79e0fb}, + {0xa4b8cab1a1563f52, 0x577001b891185939}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87}, + {0x80b05e5ac60b6178, 0x544f8158315b05b5}, + {0xa0dc75f1778e39d6, 0x696361ae3db1c722}, + {0xc913936dd571c84c, 0x03bc3a19cd1e38ea}, + {0xfb5878494ace3a5f, 0x04ab48a04065c724}, + {0x9d174b2dcec0e47b, 0x62eb0d64283f9c77}, + {0xc45d1df942711d9a, 0x3ba5d0bd324f8395}, + {0xf5746577930d6500, 0xca8f44ec7ee3647a}, + {0x9968bf6abbe85f20, 0x7e998b13cf4e1ecc}, + {0xbfc2ef456ae276e8, 0x9e3fedd8c321a67f}, + {0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101f}, + {0x95d04aee3b80ece5, 0xbba1f1d158724a13}, + {0xbb445da9ca61281f, 0x2a8a6e45ae8edc98}, + {0xea1575143cf97226, 0xf52d09d71a3293be}, + {0x924d692ca61be758, 0x593c2626705f9c57}, + {0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836d}, + {0xe498f455c38b997a, 0x0b6dfb9c0f956448}, + {0x8edf98b59a373fec, 0x4724bd4189bd5ead}, + {0xb2977ee300c50fe7, 0x58edec91ec2cb658}, + {0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ee}, + {0x8b865b215899f46c, 0xbd79e0d20082ee75}, + {0xae67f1e9aec07187, 0xecd8590680a3aa12}, + {0xda01ee641a708de9, 0xe80e6f4820cc9496}, + {0x884134fe908658b2, 0x3109058d147fdcde}, + {0xaa51823e34a7eede, 0xbd4b46f0599fd416}, + {0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91b}, + {0x850fadc09923329e, 0x03e2cf6bc604ddb1}, + {0xa6539930bf6bff45, 0x84db8346b786151d}, + {0xcfe87f7cef46ff16, 0xe612641865679a64}, + {0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07f}, + {0xa26da3999aef7749, 0xe3be5e330f38f09e}, + {0xcb090c8001ab551c, 0x5cadf5bfd3072cc6}, + {0xfdcb4fa002162a63, 0x73d9732fc7c8f7f7}, + {0x9e9f11c4014dda7e, 0x2867e7fddcdd9afb}, + {0xc646d63501a1511d, 0xb281e1fd541501b9}, + {0xf7d88bc24209a565, 0x1f225a7ca91a4227}, + {0x9ae757596946075f, 0x3375788de9b06959}, + {0xc1a12d2fc3978937, 0x0052d6b1641c83af}, + {0xf209787bb47d6b84, 0xc0678c5dbd23a49b}, + {0x9745eb4d50ce6332, 0xf840b7ba963646e1}, + {0xbd176620a501fbff, 0xb650e5a93bc3d899}, + {0xec5d3fa8ce427aff, 0xa3e51f138ab4cebf}, + {0x93ba47c980e98cdf, 0xc66f336c36b10138}, + {0xb8a8d9bbe123f017, 0xb80b0047445d4185}, + {0xe6d3102ad96cec1d, 0xa60dc059157491e6}, + {0x9043ea1ac7e41392, 0x87c89837ad68db30}, + {0xb454e4a179dd1877, 0x29babe4598c311fc}, + {0xe16a1dc9d8545e94, 0xf4296dd6fef3d67b}, + {0x8ce2529e2734bb1d, 0x1899e4a65f58660d}, + {0xb01ae745b101e9e4, 0x5ec05dcff72e7f90}, + {0xdc21a1171d42645d, 0x76707543f4fa1f74}, + {0x899504ae72497eba, 0x6a06494a791c53a9}, + {0xabfa45da0edbde69, 0x0487db9d17636893}, + {0xd6f8d7509292d603, 0x45a9d2845d3c42b7}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3}, + {0xa7f26836f282b732, 0x8e6cac7768d7141f}, + {0xd1ef0244af2364ff, 0x3207d795430cd927}, + {0x8335616aed761f1f, 0x7f44e6bd49e807b9}, + {0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a7}, + {0xcd036837130890a1, 0x36dba887c37a8c10}, + {0x802221226be55a64, 0xc2494954da2c978a}, + {0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6d}, + {0xc83553c5c8965d3d, 0x6f92829494e5acc8}, + {0xfa42a8b73abbf48c, 0xcb772339ba1f17fa}, + {0x9c69a97284b578d7, 0xff2a760414536efc}, + {0xc38413cf25e2d70d, 0xfef5138519684abb}, + {0xf46518c2ef5b8cd1, 0x7eb258665fc25d6a}, + {0x98bf2f79d5993802, 0xef2f773ffbd97a62}, + {0xbeeefb584aff8603, 0xaafb550ffacfd8fb}, + {0xeeaaba2e5dbf6784, 0x95ba2a53f983cf39}, + {0x952ab45cfa97a0b2, 0xdd945a747bf26184}, + {0xba756174393d88df, 0x94f971119aeef9e5}, + {0xe912b9d1478ceb17, 0x7a37cd5601aab85e}, + {0x91abb422ccb812ee, 0xac62e055c10ab33b}, + {0xb616a12b7fe617aa, 0x577b986b314d600a}, + {0xe39c49765fdf9d94, 0xed5a7e85fda0b80c}, + {0x8e41ade9fbebc27d, 0x14588f13be847308}, + {0xb1d219647ae6b31c, 0x596eb2d8ae258fc9}, + {0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bc}, + {0x8aec23d680043bee, 0x25de7bb9480d5855}, + {0xada72ccc20054ae9, 0xaf561aa79a10ae6b}, + {0xd910f7ff28069da4, 0x1b2ba1518094da05}, + {0x87aa9aff79042286, 0x90fb44d2f05d0843}, + {0xa99541bf57452b28, 0x353a1607ac744a54}, + {0xd3fa922f2d1675f2, 0x42889b8997915ce9}, + {0x847c9b5d7c2e09b7, 0x69956135febada12}, + {0xa59bc234db398c25, 0x43fab9837e699096}, + {0xcf02b2c21207ef2e, 0x94f967e45e03f4bc}, + {0x8161afb94b44f57d, 0x1d1be0eebac278f6}, + {0xa1ba1ba79e1632dc, 0x6462d92a69731733}, + {0xca28a291859bbf93, 0x7d7b8f7503cfdcff}, + {0xfcb2cb35e702af78, 0x5cda735244c3d43f}, + {0x9defbf01b061adab, 0x3a0888136afa64a8}, + {0xc56baec21c7a1916, 0x088aaa1845b8fdd1}, + {0xf6c69a72a3989f5b, 0x8aad549e57273d46}, + {0x9a3c2087a63f6399, 0x36ac54e2f678864c}, + {0xc0cb28a98fcf3c7f, 0x84576a1bb416a7de}, + {0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d6}, + {0x969eb7c47859e743, 0x9f644ae5a4b1b326}, + {0xbc4665b596706114, 0x873d5d9f0dde1fef}, + {0xeb57ff22fc0c7959, 0xa90cb506d155a7eb}, + {0x9316ff75dd87cbd8, 0x09a7f12442d588f3}, + {0xb7dcbf5354e9bece, 0x0c11ed6d538aeb30}, + {0xe5d3ef282a242e81, 0x8f1668c8a86da5fb}, + {0x8fa475791a569d10, 0xf96e017d694487bd}, + {0xb38d92d760ec4455, 0x37c981dcc395a9ad}, + {0xe070f78d3927556a, 0x85bbe253f47b1418}, + {0x8c469ab843b89562, 0x93956d7478ccec8f}, + {0xaf58416654a6babb, 0x387ac8d1970027b3}, + {0xdb2e51bfe9d0696a, 0x06997b05fcc0319f}, + {0x88fcf317f22241e2, 0x441fece3bdf81f04}, + {0xab3c2fddeeaad25a, 0xd527e81cad7626c4}, + {0xd60b3bd56a5586f1, 0x8a71e223d8d3b075}, + {0x85c7056562757456, 0xf6872d5667844e4a}, + {0xa738c6bebb12d16c, 0xb428f8ac016561dc}, + {0xd106f86e69d785c7, 0xe13336d701beba53}, + {0x82a45b450226b39c, 0xecc0024661173474}, + {0xa34d721642b06084, 0x27f002d7f95d0191}, + {0xcc20ce9bd35c78a5, 0x31ec038df7b441f5}, + {0xff290242c83396ce, 0x7e67047175a15272}, + {0x9f79a169bd203e41, 0x0f0062c6e984d387}, + {0xc75809c42c684dd1, 0x52c07b78a3e60869}, + {0xf92e0c3537826145, 0xa7709a56ccdf8a83}, + {0x9bbcc7a142b17ccb, 0x88a66076400bb692}, + {0xc2abf989935ddbfe, 0x6acff893d00ea436}, + {0xf356f7ebf83552fe, 0x0583f6b8c4124d44}, + {0x98165af37b2153de, 0xc3727a337a8b704b}, + {0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5d}, + {0xeda2ee1c7064130c, 0x1162def06f79df74}, + {0x9485d4d1c63e8be7, 0x8addcb5645ac2ba9}, + {0xb9a74a0637ce2ee1, 0x6d953e2bd7173693}, + {0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0438}, + {0x910ab1d4db9914a0, 0x1d9c9892400a22a3}, + {0xb54d5e4a127f59c8, 0x2503beb6d00cab4c}, + {0xe2a0b5dc971f303a, 0x2e44ae64840fd61e}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d3}, + {0xb10d8e1456105dad, 0x7425a83e872c5f48}, + {0xdd50f1996b947518, 0xd12f124e28f7771a}, + {0x8a5296ffe33cc92f, 0x82bd6b70d99aaa70}, + {0xace73cbfdc0bfb7b, 0x636cc64d1001550c}, + {0xd8210befd30efa5a, 0x3c47f7e05401aa4f}, + {0x8714a775e3e95c78, 0x65acfaec34810a72}, + {0xa8d9d1535ce3b396, 0x7f1839a741a14d0e}, + {0xd31045a8341ca07c, 0x1ede48111209a051}, + {0x83ea2b892091e44d, 0x934aed0aab460433}, + {0xa4e4b66b68b65d60, 0xf81da84d56178540}, + {0xce1de40642e3f4b9, 0x36251260ab9d668f}, + {0x80d2ae83e9ce78f3, 0xc1d72b7c6b42601a}, + {0xa1075a24e4421730, 0xb24cf65b8612f820}, + {0xc94930ae1d529cfc, 0xdee033f26797b628}, + {0xfb9b7cd9a4a7443c, 0x169840ef017da3b2}, + {0x9d412e0806e88aa5, 0x8e1f289560ee864f}, + {0xc491798a08a2ad4e, 0xf1a6f2bab92a27e3}, + {0xf5b5d7ec8acb58a2, 0xae10af696774b1dc}, + {0x9991a6f3d6bf1765, 0xacca6da1e0a8ef2a}, + {0xbff610b0cc6edd3f, 0x17fd090a58d32af4}, + {0xeff394dcff8a948e, 0xddfc4b4cef07f5b1}, + {0x95f83d0a1fb69cd9, 0x4abdaf101564f98f}, + {0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f2}, + {0xea53df5fd18d5513, 0x84c86189216dc5ee}, + {0x92746b9be2f8552c, 0x32fd3cf5b4e49bb5}, + {0xb7118682dbb66a77, 0x3fbc8c33221dc2a2}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, + {0x8f05b1163ba6832d, 0x29cb4d87f2a7400f}, + {0xb2c71d5bca9023f8, 0x743e20e9ef511013}, + {0xdf78e4b2bd342cf6, 0x914da9246b255417}, + {0x8bab8eefb6409c1a, 0x1ad089b6c2f7548f}, + {0xae9672aba3d0c320, 0xa184ac2473b529b2}, + {0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741f}, + {0x8865899617fb1871, 0x7e2fa67c7a658893}, + {0xaa7eebfb9df9de8d, 0xddbb901b98feeab8}, + {0xd51ea6fa85785631, 0x552a74227f3ea566}, + {0x8533285c936b35de, 0xd53a88958f872760}, + {0xa67ff273b8460356, 0x8a892abaf368f138}, + {0xd01fef10a657842c, 0x2d2b7569b0432d86}, + {0x8213f56a67f6b29b, 0x9c3b29620e29fc74}, + {0xa298f2c501f45f42, 0x8349f3ba91b47b90}, + {0xcb3f2f7642717713, 0x241c70a936219a74}, + {0xfe0efb53d30dd4d7, 0xed238cd383aa0111}, + {0x9ec95d1463e8a506, 0xf4363804324a40ab}, + {0xc67bb4597ce2ce48, 0xb143c6053edcd0d6}, + {0xf81aa16fdc1b81da, 0xdd94b7868e94050b}, + {0x9b10a4e5e9913128, 0xca7cf2b4191c8327}, + {0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f1}, + {0xf24a01a73cf2dccf, 0xbc633b39673c8ced}, + {0x976e41088617ca01, 0xd5be0503e085d814}, + {0xbd49d14aa79dbc82, 0x4b2d8644d8a74e19}, + {0xec9c459d51852ba2, 0xddf8e7d60ed1219f}, + {0x93e1ab8252f33b45, 0xcabb90e5c942b504}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936244}, + {0xe7109bfba19c0c9d, 0x0cc512670a783ad5}, + {0x906a617d450187e2, 0x27fb2b80668b24c6}, + {0xb484f9dc9641e9da, 0xb1f9f660802dedf7}, + {0xe1a63853bbd26451, 0x5e7873f8a0396974}, + {0x8d07e33455637eb2, 0xdb0b487b6423e1e9}, + {0xb049dc016abc5e5f, 0x91ce1a9a3d2cda63}, + {0xdc5c5301c56b75f7, 0x7641a140cc7810fc}, + {0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9e}, + {0xac2820d9623bf429, 0x546345fa9fbdcd45}, + {0xd732290fbacaf133, 0xa97c177947ad4096}, + {0x867f59a9d4bed6c0, 0x49ed8eabcccc485e}, + {0xa81f301449ee8c70, 0x5c68f256bfff5a75}, + {0xd226fc195c6a2f8c, 0x73832eec6fff3112}, + {0x83585d8fd9c25db7, 0xc831fd53c5ff7eac}, + {0xa42e74f3d032f525, 0xba3e7ca8b77f5e56}, + {0xcd3a1230c43fb26f, 0x28ce1bd2e55f35ec}, + {0x80444b5e7aa7cf85, 0x7980d163cf5b81b4}, + {0xa0555e361951c366, 0xd7e105bcc3326220}, + {0xc86ab5c39fa63440, 0x8dd9472bf3fefaa8}, + {0xfa856334878fc150, 0xb14f98f6f0feb952}, + {0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d4}, + {0xc3b8358109e84f07, 0x0a862f80ec4700c9}, + {0xf4a642e14c6262c8, 0xcd27bb612758c0fb}, + {0x98e7e9cccfbd7dbd, 0x8038d51cb897789d}, + {0xbf21e44003acdd2c, 0xe0470a63e6bd56c4}, + {0xeeea5d5004981478, 0x1858ccfce06cac75}, + {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, + {0xbaa718e68396cffd, 0xd30560258f54e6bb}, + {0xe950df20247c83fd, 0x47c6b82ef32a206a}, + {0x91d28b7416cdd27e, 0x4cdc331d57fa5442}, + {0xb6472e511c81471d, 0xe0133fe4adf8e953}, + {0xe3d8f9e563a198e5, 0x58180fddd97723a7}, + {0x8e679c2f5e44ff8f, 0x570f09eaa7ea7649}, + {0xb201833b35d63f73, 0x2cd2cc6551e513db}, + {0xde81e40a034bcf4f, 0xf8077f7ea65e58d2}, + {0x8b112e86420f6191, 0xfb04afaf27faf783}, + {0xadd57a27d29339f6, 0x79c5db9af1f9b564}, + {0xd94ad8b1c7380874, 0x18375281ae7822bd}, + {0x87cec76f1c830548, 0x8f2293910d0b15b6}, + {0xa9c2794ae3a3c69a, 0xb2eb3875504ddb23}, + {0xd433179d9c8cb841, 0x5fa60692a46151ec}, + {0x849feec281d7f328, 0xdbc7c41ba6bcd334}, + {0xa5c7ea73224deff3, 0x12b9b522906c0801}, + {0xcf39e50feae16bef, 0xd768226b34870a01}, + {0x81842f29f2cce375, 0xe6a1158300d46641}, + {0xa1e53af46f801c53, 0x60495ae3c1097fd1}, + {0xca5e89b18b602368, 0x385bb19cb14bdfc5}, + {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6}, + {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, + {0xc5a05277621be293, 0xc7098b7305241886}, + { 0xf70867153aa2db38, + 0xb8cbee4fc66d1ea8 } +#else + {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, + {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, + {0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481f}, + {0x86a8d39ef77164bc, 0xae5dff9c02033198}, + {0xd98ddaee19068c76, 0x3badd624dd9b0958}, + {0xafbd2350644eeacf, 0xe5d1929ef90898fb}, + {0x8df5efabc5979c8f, 0xca8d3ffa1ef463c2}, + {0xe55990879ddcaabd, 0xcc420a6a101d0516}, + {0xb94470938fa89bce, 0xf808e40e8d5b3e6a}, + {0x95a8637627989aad, 0xdde7001379a44aa9}, + {0xf1c90080baf72cb1, 0x5324c68b12dd6339}, + {0xc350000000000000, 0x0000000000000000}, + {0x9dc5ada82b70b59d, 0xf020000000000000}, + {0xfee50b7025c36a08, 0x02f236d04753d5b5}, + {0xcde6fd5e09abcf26, 0xed4c0226b55e6f87}, + {0xa6539930bf6bff45, 0x84db8346b786151d}, + {0x865b86925b9bc5c2, 0x0b8a2392ba45a9b3}, + {0xd910f7ff28069da4, 0x1b2ba1518094da05}, + {0xaf58416654a6babb, 0x387ac8d1970027b3}, + {0x8da471a9de737e24, 0x5ceaecfed289e5d3}, + {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, + {0xb8da1662e7b00a17, 0x3d6a751f3b936244}, + { 0x95527a5202df0ccb, + 0x0f37801e0c43ebc9 } +#endif + }; + +#if FMT_USE_FULL_CACHE_DRAGONBOX + return pow10_significands[k - float_info::min_k]; +#else + static constexpr const uint64_t powers_of_5_64[] = { + 0x0000000000000001, 0x0000000000000005, 0x0000000000000019, + 0x000000000000007d, 0x0000000000000271, 0x0000000000000c35, + 0x0000000000003d09, 0x000000000001312d, 0x000000000005f5e1, + 0x00000000001dcd65, 0x00000000009502f9, 0x0000000002e90edd, + 0x000000000e8d4a51, 0x0000000048c27395, 0x000000016bcc41e9, + 0x000000071afd498d, 0x0000002386f26fc1, 0x000000b1a2bc2ec5, + 0x000003782dace9d9, 0x00001158e460913d, 0x000056bc75e2d631, + 0x0001b1ae4d6e2ef5, 0x000878678326eac9, 0x002a5a058fc295ed, + 0x00d3c21bcecceda1, 0x0422ca8b0a00a425, 0x14adf4b7320334b9}; + + static const int compression_ratio = 27; + + // Compute base index. + int cache_index = (k - float_info::min_k) / compression_ratio; + int kb = cache_index * compression_ratio + float_info::min_k; + int offset = k - kb; + + // Get base cache. + uint128_fallback base_cache = pow10_significands[cache_index]; + if (offset == 0) return base_cache; + + // Compute the required amount of bit-shift. + int alpha = floor_log2_pow10(kb + offset) - floor_log2_pow10(kb) - offset; + FMT_ASSERT(alpha > 0 && alpha < 64, "shifting error detected"); + + // Try to recover the real cache. + uint64_t pow5 = powers_of_5_64[offset]; + uint128_fallback recovered_cache = umul128(base_cache.high(), pow5); + uint128_fallback middle_low = umul128(base_cache.low(), pow5); + + recovered_cache += middle_low.high(); + + uint64_t high_to_middle = recovered_cache.high() << (64 - alpha); + uint64_t middle_to_low = recovered_cache.low() << (64 - alpha); + + recovered_cache = + uint128_fallback{(recovered_cache.low() >> alpha) | high_to_middle, + ((middle_low.low() >> alpha) | middle_to_low)}; + FMT_ASSERT(recovered_cache.low() + 1 != 0, ""); + return {recovered_cache.high(), recovered_cache.low() + 1}; +#endif + } + + struct compute_mul_result { + carrier_uint result; + bool is_integer; + }; + struct compute_mul_parity_result { + bool parity; + bool is_integer; + }; + + static compute_mul_result compute_mul( + carrier_uint u, const cache_entry_type& cache) noexcept { + auto r = umul192_upper128(u, cache); + return {r.high(), r.low() == 0}; + } + + static uint32_t compute_delta(cache_entry_type const& cache, + int beta) noexcept { + return static_cast(cache.high() >> (64 - 1 - beta)); + } + + static compute_mul_parity_result compute_mul_parity( + carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + FMT_ASSERT(beta >= 1, ""); + FMT_ASSERT(beta < 64, ""); + + auto r = umul192_lower128(two_f, cache); + return {((r.high() >> (64 - beta)) & 1) != 0, + ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; + } + + static carrier_uint compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return (cache.high() - + (cache.high() >> (num_significand_bits() + 2))) >> + (64 - num_significand_bits() - 1 - beta); + } + + static carrier_uint compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return (cache.high() + + (cache.high() >> (num_significand_bits() + 1))) >> + (64 - num_significand_bits() - 1 - beta); + } + + static carrier_uint compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept { + return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + + 1) / + 2; + } +}; + +// Various integer checks +template +bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { + const int case_shorter_interval_left_endpoint_lower_threshold = 2; + const int case_shorter_interval_left_endpoint_upper_threshold = 3; + return exponent >= case_shorter_interval_left_endpoint_lower_threshold && + exponent <= case_shorter_interval_left_endpoint_upper_threshold; +} + +// Remove trailing zeros from n and return the number of zeros removed (float) +FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept { + FMT_ASSERT(n != 0, ""); + const uint32_t mod_inv_5 = 0xcccccccd; + const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 0; + while (true) { + auto q = rotr(n * mod_inv_25, 2); + if (q > max_value() / 100) break; + n = q; + s += 2; + } + auto q = rotr(n * mod_inv_5, 1); + if (q <= max_value() / 10) { + n = q; + s |= 1; + } + + return s; +} + +// Removes trailing zeros and returns the number of zeros removed (double) +FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { + FMT_ASSERT(n != 0, ""); + + // This magic number is ceil(2^90 / 10^8). + constexpr uint64_t magic_number = 12379400392853802749ull; + auto nm = umul128(n, magic_number); + + // Is n is divisible by 10^8? + if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) { + // If yes, work with the quotient. + auto n32 = static_cast(nm.high() >> (90 - 64)); + + const uint32_t mod_inv_5 = 0xcccccccd; + const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 8; + while (true) { + auto q = rotr(n32 * mod_inv_25, 2); + if (q > max_value() / 100) break; + n32 = q; + s += 2; + } + auto q = rotr(n32 * mod_inv_5, 1); + if (q <= max_value() / 10) { + n32 = q; + s |= 1; + } + + n = n32; + return s; + } + + // If n is not divisible by 10^8, work with n itself. + const uint64_t mod_inv_5 = 0xcccccccccccccccd; + const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5; + + int s = 0; + while (true) { + auto q = rotr(n * mod_inv_25, 2); + if (q > max_value() / 100) break; + n = q; + s += 2; + } + auto q = rotr(n * mod_inv_5, 1); + if (q <= max_value() / 10) { + n = q; + s |= 1; + } + + return s; +} + +// The main algorithm for shorter interval case +template +FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { + decimal_fp ret_value; + // Compute k and beta + const int minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent); + const int beta = exponent + floor_log2_pow10(-minus_k); + + // Compute xi and zi + using cache_entry_type = typename cache_accessor::cache_entry_type; + const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); + + auto xi = cache_accessor::compute_left_endpoint_for_shorter_interval_case( + cache, beta); + auto zi = cache_accessor::compute_right_endpoint_for_shorter_interval_case( + cache, beta); + + // If the left endpoint is not an integer, increase it + if (!is_left_endpoint_integer_shorter_interval(exponent)) ++xi; + + // Try bigger divisor + ret_value.significand = zi / 10; + + // If succeed, remove trailing zeros if necessary and return + if (ret_value.significand * 10 >= xi) { + ret_value.exponent = minus_k + 1; + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + } + + // Otherwise, compute the round-up of y + ret_value.significand = + cache_accessor::compute_round_up_for_shorter_interval_case(cache, + beta); + ret_value.exponent = minus_k; + + // When tie occurs, choose one of them according to the rule + if (exponent >= float_info::shorter_interval_tie_lower_threshold && + exponent <= float_info::shorter_interval_tie_upper_threshold) { + ret_value.significand = ret_value.significand % 2 == 0 + ? ret_value.significand + : ret_value.significand - 1; + } else if (ret_value.significand < xi) { + ++ret_value.significand; + } + return ret_value; +} + +template decimal_fp to_decimal(T x) noexcept { + // Step 1: integer promotion & Schubfach multiplier calculation. + + using carrier_uint = typename float_info::carrier_uint; + using cache_entry_type = typename cache_accessor::cache_entry_type; + auto br = bit_cast(x); + + // Extract significand bits and exponent bits. + const carrier_uint significand_mask = + (static_cast(1) << num_significand_bits()) - 1; + carrier_uint significand = (br & significand_mask); + int exponent = + static_cast((br & exponent_mask()) >> num_significand_bits()); + + if (exponent != 0) { // Check if normal. + exponent -= exponent_bias() + num_significand_bits(); + + // Shorter interval case; proceed like Schubfach. + // In fact, when exponent == 1 and significand == 0, the interval is + // regular. However, it can be shown that the end-results are anyway same. + if (significand == 0) return shorter_interval_case(exponent); + + significand |= (static_cast(1) << num_significand_bits()); + } else { + // Subnormal case; the interval is always regular. + if (significand == 0) return {0, 0}; + exponent = + std::numeric_limits::min_exponent - num_significand_bits() - 1; + } + + const bool include_left_endpoint = (significand % 2 == 0); + const bool include_right_endpoint = include_left_endpoint; + + // Compute k and beta. + const int minus_k = floor_log10_pow2(exponent) - float_info::kappa; + const cache_entry_type cache = cache_accessor::get_cached_power(-minus_k); + const int beta = exponent + floor_log2_pow10(-minus_k); + + // Compute zi and deltai. + // 10^kappa <= deltai < 10^(kappa + 1) + const uint32_t deltai = cache_accessor::compute_delta(cache, beta); + const carrier_uint two_fc = significand << 1; + + // For the case of binary32, the result of integer check is not correct for + // 29711844 * 2^-82 + // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18 + // and 29711844 * 2^-81 + // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17, + // and they are the unique counterexamples. However, since 29711844 is even, + // this does not cause any problem for the endpoints calculations; it can only + // cause a problem when we need to perform integer check for the center. + // Fortunately, with these inputs, that branch is never executed, so we are + // fine. + const typename cache_accessor::compute_mul_result z_mul = + cache_accessor::compute_mul((two_fc | 1) << beta, cache); + + // Step 2: Try larger divisor; remove trailing zeros if necessary. + + // Using an upper bound on zi, we might be able to optimize the division + // better than the compiler; we are computing zi / big_divisor here. + decimal_fp ret_value; + ret_value.significand = divide_by_10_to_kappa_plus_1(z_mul.result); + uint32_t r = static_cast(z_mul.result - float_info::big_divisor * + ret_value.significand); + + if (r < deltai) { + // Exclude the right endpoint if necessary. + if (r == 0 && (z_mul.is_integer & !include_right_endpoint)) { + --ret_value.significand; + r = float_info::big_divisor; + goto small_divisor_case_label; + } + } else if (r > deltai) { + goto small_divisor_case_label; + } else { + // r == deltai; compare fractional parts. + const typename cache_accessor::compute_mul_parity_result x_mul = + cache_accessor::compute_mul_parity(two_fc - 1, cache, beta); + + if (!(x_mul.parity | (x_mul.is_integer & include_left_endpoint))) + goto small_divisor_case_label; + } + ret_value.exponent = minus_k + float_info::kappa + 1; + + // We may need to remove trailing zeros. + ret_value.exponent += remove_trailing_zeros(ret_value.significand); + return ret_value; + + // Step 3: Find the significand with the smaller divisor. + +small_divisor_case_label: + ret_value.significand *= 10; + ret_value.exponent = minus_k + float_info::kappa; + + uint32_t dist = r - (deltai / 2) + (float_info::small_divisor / 2); + const bool approx_y_parity = + ((dist ^ (float_info::small_divisor / 2)) & 1) != 0; + + // Is dist divisible by 10^kappa? + const bool divisible_by_small_divisor = + check_divisibility_and_divide_by_pow10::kappa>(dist); + + // Add dist / 10^kappa to the significand. + ret_value.significand += dist; + + if (!divisible_by_small_divisor) return ret_value; + + // Check z^(f) >= epsilon^(f). + // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1, + // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f). + // Since there are only 2 possibilities, we only need to care about the + // parity. Also, zi and r should have the same parity since the divisor + // is an even number. + const auto y_mul = cache_accessor::compute_mul_parity(two_fc, cache, beta); + + // If z^(f) >= epsilon^(f), we might have a tie when z^(f) == epsilon^(f), + // or equivalently, when y is an integer. + if (y_mul.parity != approx_y_parity) + --ret_value.significand; + else if (y_mul.is_integer & (ret_value.significand % 2 != 0)) + --ret_value.significand; + return ret_value; +} +} // namespace dragonbox + +#ifdef _MSC_VER +FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) + -> int { + auto args = va_list(); + va_start(args, fmt); + int result = vsnprintf_s(buf, size, _TRUNCATE, fmt, args); + va_end(args); + return result; +} +#endif +} // namespace detail + +template <> struct formatter { + FMT_CONSTEXPR auto parse(format_parse_context& ctx) + -> format_parse_context::iterator { + return ctx.begin(); + } + + template + auto format(const detail::bigint& n, FormatContext& ctx) const -> + typename FormatContext::iterator { + auto out = ctx.out(); + bool first = true; + for (auto i = n.bigits_.size(); i > 0; --i) { + auto value = n.bigits_[i - 1u]; + if (first) { + out = format_to(out, FMT_STRING("{:x}"), value); + first = false; + continue; + } + out = format_to(out, FMT_STRING("{:08x}"), value); + } + if (n.exp_ > 0) + out = format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); + return out; + } +}; + +FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { + for_each_codepoint(s, [this](uint32_t cp, string_view) { + if (cp == invalid_code_point) FMT_THROW(std::runtime_error("invalid utf8")); + if (cp <= 0xFFFF) { + buffer_.push_back(static_cast(cp)); + } else { + cp -= 0x10000; + buffer_.push_back(static_cast(0xD800 + (cp >> 10))); + buffer_.push_back(static_cast(0xDC00 + (cp & 0x3FF))); + } + return true; + }); + buffer_.push_back(0); +} + +FMT_FUNC void format_system_error(detail::buffer& out, int error_code, + const char* message) noexcept { + FMT_TRY { + auto ec = std::error_code(error_code, std::generic_category()); + write(std::back_inserter(out), std::system_error(ec, message).what()); + return; + } + FMT_CATCH(...) {} + format_error_code(out, error_code, message); +} + +FMT_FUNC void report_system_error(int error_code, + const char* message) noexcept { + report_error(format_system_error, error_code, message); +} + +FMT_FUNC std::string vformat(string_view fmt, format_args args) { + // Don't optimize the "{}" case to keep the binary size small and because it + // can be better optimized in fmt::format anyway. + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); + return to_string(buffer); +} + +namespace detail { +#ifdef _WIN32 +using dword = conditional_t; +extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // + void*, const void*, dword, dword*, void*); + +FMT_FUNC bool write_console(std::FILE* f, string_view text) { + auto fd = _fileno(f); + if (_isatty(fd)) { + detail::utf8_to_utf16 u16(string_view(text.data(), text.size())); + auto written = detail::dword(); + if (detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), + u16.c_str(), static_cast(u16.size()), + &written, nullptr)) { + return true; + } + } + // We return false if the file descriptor was not TTY, or it was but + // SetConsoleW failed which can happen if the output has been redirected to + // NUL. In both cases when we return false, we should attempt to do regular + // write via fwrite or std::ostream::write. + return false; +} +#endif + +FMT_FUNC void print(std::FILE* f, string_view text) { +#ifdef _WIN32 + if (write_console(f, text)) return; +#endif + detail::fwrite_fully(text.data(), 1, text.size(), f); +} +} // namespace detail + +FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { + memory_buffer buffer; + detail::vformat_to(buffer, format_str, args); + detail::print(f, {buffer.data(), buffer.size()}); +} + +#ifdef _WIN32 +// Print assuming legacy (non-Unicode) encoding. +FMT_FUNC void detail::vprint_mojibake(std::FILE* f, string_view format_str, + format_args args) { + memory_buffer buffer; + detail::vformat_to(buffer, format_str, + basic_format_args>(args)); + fwrite_fully(buffer.data(), 1, buffer.size(), f); +} +#endif + +FMT_FUNC void vprint(string_view format_str, format_args args) { + vprint(stdout, format_str, args); +} + +namespace detail { + +struct singleton { + unsigned char upper; + unsigned char lower_count; +}; + +inline auto is_printable(uint16_t x, const singleton* singletons, + size_t singletons_size, + const unsigned char* singleton_lowers, + const unsigned char* normal, size_t normal_size) + -> bool { + auto upper = x >> 8; + auto lower_start = 0; + for (size_t i = 0; i < singletons_size; ++i) { + auto s = singletons[i]; + auto lower_end = lower_start + s.lower_count; + if (upper < s.upper) break; + if (upper == s.upper) { + for (auto j = lower_start; j < lower_end; ++j) { + if (singleton_lowers[j] == (x & 0xff)) return false; + } + } + lower_start = lower_end; + } + + auto xsigned = static_cast(x); + auto current = true; + for (size_t i = 0; i < normal_size; ++i) { + auto v = static_cast(normal[i]); + auto len = (v & 0x80) != 0 ? (v & 0x7f) << 8 | normal[++i] : v; + xsigned -= len; + if (xsigned < 0) break; + current = !current; + } + return current; +} + +// This code is generated by support/printable.py. +FMT_FUNC auto is_printable(uint32_t cp) -> bool { + static constexpr singleton singletons0[] = { + {0x00, 1}, {0x03, 5}, {0x05, 6}, {0x06, 3}, {0x07, 6}, {0x08, 8}, + {0x09, 17}, {0x0a, 28}, {0x0b, 25}, {0x0c, 20}, {0x0d, 16}, {0x0e, 13}, + {0x0f, 4}, {0x10, 3}, {0x12, 18}, {0x13, 9}, {0x16, 1}, {0x17, 5}, + {0x18, 2}, {0x19, 3}, {0x1a, 7}, {0x1c, 2}, {0x1d, 1}, {0x1f, 22}, + {0x20, 3}, {0x2b, 3}, {0x2c, 2}, {0x2d, 11}, {0x2e, 1}, {0x30, 3}, + {0x31, 2}, {0x32, 1}, {0xa7, 2}, {0xa9, 2}, {0xaa, 4}, {0xab, 8}, + {0xfa, 2}, {0xfb, 5}, {0xfd, 4}, {0xfe, 3}, {0xff, 9}, + }; + static constexpr unsigned char singletons0_lower[] = { + 0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57, 0x58, 0x8b, 0x8c, 0x90, + 0x1c, 0x1d, 0xdd, 0x0e, 0x0f, 0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f, + 0x5c, 0x5d, 0x5f, 0xb5, 0xe2, 0x84, 0x8d, 0x8e, 0x91, 0x92, 0xa9, 0xb1, + 0xba, 0xbb, 0xc5, 0xc6, 0xc9, 0xca, 0xde, 0xe4, 0xe5, 0xff, 0x00, 0x04, + 0x11, 0x12, 0x29, 0x31, 0x34, 0x37, 0x3a, 0x3b, 0x3d, 0x49, 0x4a, 0x5d, + 0x84, 0x8e, 0x92, 0xa9, 0xb1, 0xb4, 0xba, 0xbb, 0xc6, 0xca, 0xce, 0xcf, + 0xe4, 0xe5, 0x00, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a, + 0x3b, 0x45, 0x46, 0x49, 0x4a, 0x5e, 0x64, 0x65, 0x84, 0x91, 0x9b, 0x9d, + 0xc9, 0xce, 0xcf, 0x0d, 0x11, 0x29, 0x45, 0x49, 0x57, 0x64, 0x65, 0x8d, + 0x91, 0xa9, 0xb4, 0xba, 0xbb, 0xc5, 0xc9, 0xdf, 0xe4, 0xe5, 0xf0, 0x0d, + 0x11, 0x45, 0x49, 0x64, 0x65, 0x80, 0x84, 0xb2, 0xbc, 0xbe, 0xbf, 0xd5, + 0xd7, 0xf0, 0xf1, 0x83, 0x85, 0x8b, 0xa4, 0xa6, 0xbe, 0xbf, 0xc5, 0xc7, + 0xce, 0xcf, 0xda, 0xdb, 0x48, 0x98, 0xbd, 0xcd, 0xc6, 0xce, 0xcf, 0x49, + 0x4e, 0x4f, 0x57, 0x59, 0x5e, 0x5f, 0x89, 0x8e, 0x8f, 0xb1, 0xb6, 0xb7, + 0xbf, 0xc1, 0xc6, 0xc7, 0xd7, 0x11, 0x16, 0x17, 0x5b, 0x5c, 0xf6, 0xf7, + 0xfe, 0xff, 0x80, 0x0d, 0x6d, 0x71, 0xde, 0xdf, 0x0e, 0x0f, 0x1f, 0x6e, + 0x6f, 0x1c, 0x1d, 0x5f, 0x7d, 0x7e, 0xae, 0xaf, 0xbb, 0xbc, 0xfa, 0x16, + 0x17, 0x1e, 0x1f, 0x46, 0x47, 0x4e, 0x4f, 0x58, 0x5a, 0x5c, 0x5e, 0x7e, + 0x7f, 0xb5, 0xc5, 0xd4, 0xd5, 0xdc, 0xf0, 0xf1, 0xf5, 0x72, 0x73, 0x8f, + 0x74, 0x75, 0x96, 0x2f, 0x5f, 0x26, 0x2e, 0x2f, 0xa7, 0xaf, 0xb7, 0xbf, + 0xc7, 0xcf, 0xd7, 0xdf, 0x9a, 0x40, 0x97, 0x98, 0x30, 0x8f, 0x1f, 0xc0, + 0xc1, 0xce, 0xff, 0x4e, 0x4f, 0x5a, 0x5b, 0x07, 0x08, 0x0f, 0x10, 0x27, + 0x2f, 0xee, 0xef, 0x6e, 0x6f, 0x37, 0x3d, 0x3f, 0x42, 0x45, 0x90, 0x91, + 0xfe, 0xff, 0x53, 0x67, 0x75, 0xc8, 0xc9, 0xd0, 0xd1, 0xd8, 0xd9, 0xe7, + 0xfe, 0xff, + }; + static constexpr singleton singletons1[] = { + {0x00, 6}, {0x01, 1}, {0x03, 1}, {0x04, 2}, {0x08, 8}, {0x09, 2}, + {0x0a, 5}, {0x0b, 2}, {0x0e, 4}, {0x10, 1}, {0x11, 2}, {0x12, 5}, + {0x13, 17}, {0x14, 1}, {0x15, 2}, {0x17, 2}, {0x19, 13}, {0x1c, 5}, + {0x1d, 8}, {0x24, 1}, {0x6a, 3}, {0x6b, 2}, {0xbc, 2}, {0xd1, 2}, + {0xd4, 12}, {0xd5, 9}, {0xd6, 2}, {0xd7, 2}, {0xda, 1}, {0xe0, 5}, + {0xe1, 2}, {0xe8, 2}, {0xee, 32}, {0xf0, 4}, {0xf8, 2}, {0xf9, 2}, + {0xfa, 2}, {0xfb, 1}, + }; + static constexpr unsigned char singletons1_lower[] = { + 0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e, 0x9e, 0x9f, 0x06, 0x07, + 0x09, 0x36, 0x3d, 0x3e, 0x56, 0xf3, 0xd0, 0xd1, 0x04, 0x14, 0x18, 0x36, + 0x37, 0x56, 0x57, 0x7f, 0xaa, 0xae, 0xaf, 0xbd, 0x35, 0xe0, 0x12, 0x87, + 0x89, 0x8e, 0x9e, 0x04, 0x0d, 0x0e, 0x11, 0x12, 0x29, 0x31, 0x34, 0x3a, + 0x45, 0x46, 0x49, 0x4a, 0x4e, 0x4f, 0x64, 0x65, 0x5c, 0xb6, 0xb7, 0x1b, + 0x1c, 0x07, 0x08, 0x0a, 0x0b, 0x14, 0x17, 0x36, 0x39, 0x3a, 0xa8, 0xa9, + 0xd8, 0xd9, 0x09, 0x37, 0x90, 0x91, 0xa8, 0x07, 0x0a, 0x3b, 0x3e, 0x66, + 0x69, 0x8f, 0x92, 0x6f, 0x5f, 0xee, 0xef, 0x5a, 0x62, 0x9a, 0x9b, 0x27, + 0x28, 0x55, 0x9d, 0xa0, 0xa1, 0xa3, 0xa4, 0xa7, 0xa8, 0xad, 0xba, 0xbc, + 0xc4, 0x06, 0x0b, 0x0c, 0x15, 0x1d, 0x3a, 0x3f, 0x45, 0x51, 0xa6, 0xa7, + 0xcc, 0xcd, 0xa0, 0x07, 0x19, 0x1a, 0x22, 0x25, 0x3e, 0x3f, 0xc5, 0xc6, + 0x04, 0x20, 0x23, 0x25, 0x26, 0x28, 0x33, 0x38, 0x3a, 0x48, 0x4a, 0x4c, + 0x50, 0x53, 0x55, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x63, 0x65, 0x66, + 0x6b, 0x73, 0x78, 0x7d, 0x7f, 0x8a, 0xa4, 0xaa, 0xaf, 0xb0, 0xc0, 0xd0, + 0xae, 0xaf, 0x79, 0xcc, 0x6e, 0x6f, 0x93, + }; + static constexpr unsigned char normal0[] = { + 0x00, 0x20, 0x5f, 0x22, 0x82, 0xdf, 0x04, 0x82, 0x44, 0x08, 0x1b, 0x04, + 0x06, 0x11, 0x81, 0xac, 0x0e, 0x80, 0xab, 0x35, 0x28, 0x0b, 0x80, 0xe0, + 0x03, 0x19, 0x08, 0x01, 0x04, 0x2f, 0x04, 0x34, 0x04, 0x07, 0x03, 0x01, + 0x07, 0x06, 0x07, 0x11, 0x0a, 0x50, 0x0f, 0x12, 0x07, 0x55, 0x07, 0x03, + 0x04, 0x1c, 0x0a, 0x09, 0x03, 0x08, 0x03, 0x07, 0x03, 0x02, 0x03, 0x03, + 0x03, 0x0c, 0x04, 0x05, 0x03, 0x0b, 0x06, 0x01, 0x0e, 0x15, 0x05, 0x3a, + 0x03, 0x11, 0x07, 0x06, 0x05, 0x10, 0x07, 0x57, 0x07, 0x02, 0x07, 0x15, + 0x0d, 0x50, 0x04, 0x43, 0x03, 0x2d, 0x03, 0x01, 0x04, 0x11, 0x06, 0x0f, + 0x0c, 0x3a, 0x04, 0x1d, 0x25, 0x5f, 0x20, 0x6d, 0x04, 0x6a, 0x25, 0x80, + 0xc8, 0x05, 0x82, 0xb0, 0x03, 0x1a, 0x06, 0x82, 0xfd, 0x03, 0x59, 0x07, + 0x15, 0x0b, 0x17, 0x09, 0x14, 0x0c, 0x14, 0x0c, 0x6a, 0x06, 0x0a, 0x06, + 0x1a, 0x06, 0x59, 0x07, 0x2b, 0x05, 0x46, 0x0a, 0x2c, 0x04, 0x0c, 0x04, + 0x01, 0x03, 0x31, 0x0b, 0x2c, 0x04, 0x1a, 0x06, 0x0b, 0x03, 0x80, 0xac, + 0x06, 0x0a, 0x06, 0x21, 0x3f, 0x4c, 0x04, 0x2d, 0x03, 0x74, 0x08, 0x3c, + 0x03, 0x0f, 0x03, 0x3c, 0x07, 0x38, 0x08, 0x2b, 0x05, 0x82, 0xff, 0x11, + 0x18, 0x08, 0x2f, 0x11, 0x2d, 0x03, 0x20, 0x10, 0x21, 0x0f, 0x80, 0x8c, + 0x04, 0x82, 0x97, 0x19, 0x0b, 0x15, 0x88, 0x94, 0x05, 0x2f, 0x05, 0x3b, + 0x07, 0x02, 0x0e, 0x18, 0x09, 0x80, 0xb3, 0x2d, 0x74, 0x0c, 0x80, 0xd6, + 0x1a, 0x0c, 0x05, 0x80, 0xff, 0x05, 0x80, 0xdf, 0x0c, 0xee, 0x0d, 0x03, + 0x84, 0x8d, 0x03, 0x37, 0x09, 0x81, 0x5c, 0x14, 0x80, 0xb8, 0x08, 0x80, + 0xcb, 0x2a, 0x38, 0x03, 0x0a, 0x06, 0x38, 0x08, 0x46, 0x08, 0x0c, 0x06, + 0x74, 0x0b, 0x1e, 0x03, 0x5a, 0x04, 0x59, 0x09, 0x80, 0x83, 0x18, 0x1c, + 0x0a, 0x16, 0x09, 0x4c, 0x04, 0x80, 0x8a, 0x06, 0xab, 0xa4, 0x0c, 0x17, + 0x04, 0x31, 0xa1, 0x04, 0x81, 0xda, 0x26, 0x07, 0x0c, 0x05, 0x05, 0x80, + 0xa5, 0x11, 0x81, 0x6d, 0x10, 0x78, 0x28, 0x2a, 0x06, 0x4c, 0x04, 0x80, + 0x8d, 0x04, 0x80, 0xbe, 0x03, 0x1b, 0x03, 0x0f, 0x0d, + }; + static constexpr unsigned char normal1[] = { + 0x5e, 0x22, 0x7b, 0x05, 0x03, 0x04, 0x2d, 0x03, 0x66, 0x03, 0x01, 0x2f, + 0x2e, 0x80, 0x82, 0x1d, 0x03, 0x31, 0x0f, 0x1c, 0x04, 0x24, 0x09, 0x1e, + 0x05, 0x2b, 0x05, 0x44, 0x04, 0x0e, 0x2a, 0x80, 0xaa, 0x06, 0x24, 0x04, + 0x24, 0x04, 0x28, 0x08, 0x34, 0x0b, 0x01, 0x80, 0x90, 0x81, 0x37, 0x09, + 0x16, 0x0a, 0x08, 0x80, 0x98, 0x39, 0x03, 0x63, 0x08, 0x09, 0x30, 0x16, + 0x05, 0x21, 0x03, 0x1b, 0x05, 0x01, 0x40, 0x38, 0x04, 0x4b, 0x05, 0x2f, + 0x04, 0x0a, 0x07, 0x09, 0x07, 0x40, 0x20, 0x27, 0x04, 0x0c, 0x09, 0x36, + 0x03, 0x3a, 0x05, 0x1a, 0x07, 0x04, 0x0c, 0x07, 0x50, 0x49, 0x37, 0x33, + 0x0d, 0x33, 0x07, 0x2e, 0x08, 0x0a, 0x81, 0x26, 0x52, 0x4e, 0x28, 0x08, + 0x2a, 0x56, 0x1c, 0x14, 0x17, 0x09, 0x4e, 0x04, 0x1e, 0x0f, 0x43, 0x0e, + 0x19, 0x07, 0x0a, 0x06, 0x48, 0x08, 0x27, 0x09, 0x75, 0x0b, 0x3f, 0x41, + 0x2a, 0x06, 0x3b, 0x05, 0x0a, 0x06, 0x51, 0x06, 0x01, 0x05, 0x10, 0x03, + 0x05, 0x80, 0x8b, 0x62, 0x1e, 0x48, 0x08, 0x0a, 0x80, 0xa6, 0x5e, 0x22, + 0x45, 0x0b, 0x0a, 0x06, 0x0d, 0x13, 0x39, 0x07, 0x0a, 0x36, 0x2c, 0x04, + 0x10, 0x80, 0xc0, 0x3c, 0x64, 0x53, 0x0c, 0x48, 0x09, 0x0a, 0x46, 0x45, + 0x1b, 0x48, 0x08, 0x53, 0x1d, 0x39, 0x81, 0x07, 0x46, 0x0a, 0x1d, 0x03, + 0x47, 0x49, 0x37, 0x03, 0x0e, 0x08, 0x0a, 0x06, 0x39, 0x07, 0x0a, 0x81, + 0x36, 0x19, 0x80, 0xb7, 0x01, 0x0f, 0x32, 0x0d, 0x83, 0x9b, 0x66, 0x75, + 0x0b, 0x80, 0xc4, 0x8a, 0xbc, 0x84, 0x2f, 0x8f, 0xd1, 0x82, 0x47, 0xa1, + 0xb9, 0x82, 0x39, 0x07, 0x2a, 0x04, 0x02, 0x60, 0x26, 0x0a, 0x46, 0x0a, + 0x28, 0x05, 0x13, 0x82, 0xb0, 0x5b, 0x65, 0x4b, 0x04, 0x39, 0x07, 0x11, + 0x40, 0x05, 0x0b, 0x02, 0x0e, 0x97, 0xf8, 0x08, 0x84, 0xd6, 0x2a, 0x09, + 0xa2, 0xf7, 0x81, 0x1f, 0x31, 0x03, 0x11, 0x04, 0x08, 0x81, 0x8c, 0x89, + 0x04, 0x6b, 0x05, 0x0d, 0x03, 0x09, 0x07, 0x10, 0x93, 0x60, 0x80, 0xf6, + 0x0a, 0x73, 0x08, 0x6e, 0x17, 0x46, 0x80, 0x9a, 0x14, 0x0c, 0x57, 0x09, + 0x19, 0x80, 0x87, 0x81, 0x47, 0x03, 0x85, 0x42, 0x0f, 0x15, 0x85, 0x50, + 0x2b, 0x80, 0xd5, 0x2d, 0x03, 0x1a, 0x04, 0x02, 0x81, 0x70, 0x3a, 0x05, + 0x01, 0x85, 0x00, 0x80, 0xd7, 0x29, 0x4c, 0x04, 0x0a, 0x04, 0x02, 0x83, + 0x11, 0x44, 0x4c, 0x3d, 0x80, 0xc2, 0x3c, 0x06, 0x01, 0x04, 0x55, 0x05, + 0x1b, 0x34, 0x02, 0x81, 0x0e, 0x2c, 0x04, 0x64, 0x0c, 0x56, 0x0a, 0x80, + 0xae, 0x38, 0x1d, 0x0d, 0x2c, 0x04, 0x09, 0x07, 0x02, 0x0e, 0x06, 0x80, + 0x9a, 0x83, 0xd8, 0x08, 0x0d, 0x03, 0x0d, 0x03, 0x74, 0x0c, 0x59, 0x07, + 0x0c, 0x14, 0x0c, 0x04, 0x38, 0x08, 0x0a, 0x06, 0x28, 0x08, 0x22, 0x4e, + 0x81, 0x54, 0x0c, 0x15, 0x03, 0x03, 0x05, 0x07, 0x09, 0x19, 0x07, 0x07, + 0x09, 0x03, 0x0d, 0x07, 0x29, 0x80, 0xcb, 0x25, 0x0a, 0x84, 0x06, + }; + auto lower = static_cast(cp); + if (cp < 0x10000) { + return is_printable(lower, singletons0, + sizeof(singletons0) / sizeof(*singletons0), + singletons0_lower, normal0, sizeof(normal0)); + } + if (cp < 0x20000) { + return is_printable(lower, singletons1, + sizeof(singletons1) / sizeof(*singletons1), + singletons1_lower, normal1, sizeof(normal1)); + } + if (0x2a6de <= cp && cp < 0x2a700) return false; + if (0x2b735 <= cp && cp < 0x2b740) return false; + if (0x2b81e <= cp && cp < 0x2b820) return false; + if (0x2cea2 <= cp && cp < 0x2ceb0) return false; + if (0x2ebe1 <= cp && cp < 0x2f800) return false; + if (0x2fa1e <= cp && cp < 0x30000) return false; + if (0x3134b <= cp && cp < 0xe0100) return false; + if (0xe01f0 <= cp && cp < 0x110000) return false; + return cp < 0x110000; +} + +} // namespace detail + +FMT_END_NAMESPACE + +#endif // FMT_FORMAT_INL_H_ diff --git a/libkram/fmt/format.cpp b/libkram/fmt/format.cpp new file mode 100644 index 00000000..a84bea85 --- /dev/null +++ b/libkram/fmt/format.cpp @@ -0,0 +1,44 @@ +// Formatting library for C++ +// +// Copyright (c) 2012 - 2016, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#include "format-inl.h" + +FMT_BEGIN_NAMESPACE +namespace detail { + +template FMT_API auto dragonbox::to_decimal(float x) noexcept + -> dragonbox::decimal_fp; +template FMT_API auto dragonbox::to_decimal(double x) noexcept + -> dragonbox::decimal_fp; + +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +template FMT_API locale_ref::locale_ref(const std::locale& loc); +template FMT_API auto locale_ref::get() const -> std::locale; +#endif + +// Explicit instantiations for char. + +template FMT_API auto thousands_sep_impl(locale_ref) + -> thousands_sep_result; +template FMT_API auto decimal_point_impl(locale_ref) -> char; + +template FMT_API void buffer::append(const char*, const char*); + +template FMT_API void vformat_to(buffer&, string_view, + basic_format_args, + locale_ref); + +// Explicit instantiations for wchar_t. + +template FMT_API auto thousands_sep_impl(locale_ref) + -> thousands_sep_result; +template FMT_API auto decimal_point_impl(locale_ref) -> wchar_t; + +template FMT_API void buffer::append(const wchar_t*, const wchar_t*); + +} // namespace detail +FMT_END_NAMESPACE diff --git a/libkram/fmt/format.h b/libkram/fmt/format.h new file mode 100644 index 00000000..d6d50f59 --- /dev/null +++ b/libkram/fmt/format.h @@ -0,0 +1,4312 @@ +/* + Formatting library for C++ + + Copyright (c) 2012 - present, Victor Zverovich + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + --- Optional exception to the license --- + + As an exception, if, as a result of your compiling your source code, portions + of this Software are embedded into a machine-executable object form of such + source code, you may redistribute such embedded portions in such object form + without including the above copyright and permission notices. + */ + +#ifndef FMT_FORMAT_H_ +#define FMT_FORMAT_H_ + +#include // std::signbit +#include // uint32_t +#include // std::memcpy +#include // std::initializer_list +#include // std::numeric_limits +#include // std::uninitialized_copy + +// These use exceptions +#include // std::runtime_error +#include // std::system_error + +#ifdef __cpp_lib_bit_cast +# include // std::bitcast +#endif + +#include "core.h" + +#if FMT_GCC_VERSION +# define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#else +# define FMT_GCC_VISIBILITY_HIDDEN +#endif + +#ifdef __NVCC__ +# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) +#else +# define FMT_CUDA_VERSION 0 +#endif + +#ifdef __has_builtin +# define FMT_HAS_BUILTIN(x) __has_builtin(x) +#else +# define FMT_HAS_BUILTIN(x) 0 +#endif + +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_NOINLINE __attribute__((noinline)) +#else +# define FMT_NOINLINE +#endif + +#if FMT_MSC_VERSION +# define FMT_MSC_DEFAULT = default +#else +# define FMT_MSC_DEFAULT +#endif + +#ifndef FMT_THROW +# if FMT_EXCEPTIONS +# if FMT_MSC_VERSION || defined(__NVCC__) +FMT_BEGIN_NAMESPACE +namespace detail { +template inline void do_throw(const Exception& x) { + // Silence unreachable code warnings in MSVC and NVCC because these + // are nearly impossible to fix in a generic code. + volatile bool b = true; + if (b) throw x; +} +} // namespace detail +FMT_END_NAMESPACE +# define FMT_THROW(x) detail::do_throw(x) +# else +# define FMT_THROW(x) throw x +# endif +# else +# define FMT_THROW(x) \ + do { \ + FMT_ASSERT(false, (x).what()); \ + } while (false) +# endif +#endif + +#if FMT_EXCEPTIONS +# define FMT_TRY try +# define FMT_CATCH(x) catch (x) +#else +# define FMT_TRY if (true) +# define FMT_CATCH(x) if (false) +#endif + +#ifndef FMT_MAYBE_UNUSED +# if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) +# define FMT_MAYBE_UNUSED [[maybe_unused]] +# else +# define FMT_MAYBE_UNUSED +# endif +#endif + +#ifndef FMT_USE_USER_DEFINED_LITERALS +// EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ + FMT_MSC_VERSION >= 1900) && \ + (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) +# define FMT_USE_USER_DEFINED_LITERALS 1 +# else +# define FMT_USE_USER_DEFINED_LITERALS 0 +# endif +#endif + +// Defining FMT_REDUCE_INT_INSTANTIATIONS to 1, will reduce the number of +// integer formatter template instantiations to just one by only using the +// largest integer type. This results in a reduction in binary size but will +// cause a decrease in integer formatting performance. +#if !defined(FMT_REDUCE_INT_INSTANTIATIONS) +# define FMT_REDUCE_INT_INSTANTIATIONS 0 +#endif + +// __builtin_clz is broken in clang with Microsoft CodeGen: +// https://github.com/fmtlib/fmt/issues/519. +#if !FMT_MSC_VERSION +# if FMT_HAS_BUILTIN(__builtin_clz) || FMT_GCC_VERSION || FMT_ICC_VERSION +# define FMT_BUILTIN_CLZ(n) __builtin_clz(n) +# endif +# if FMT_HAS_BUILTIN(__builtin_clzll) || FMT_GCC_VERSION || FMT_ICC_VERSION +# define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n) +# endif +#endif + +// __builtin_ctz is broken in Intel Compiler Classic on Windows: +// https://github.com/fmtlib/fmt/issues/2510. +#ifndef __ICL +# if FMT_HAS_BUILTIN(__builtin_ctz) || FMT_GCC_VERSION || FMT_ICC_VERSION || \ + defined(__NVCOMPILER) +# define FMT_BUILTIN_CTZ(n) __builtin_ctz(n) +# endif +# if FMT_HAS_BUILTIN(__builtin_ctzll) || FMT_GCC_VERSION || \ + FMT_ICC_VERSION || defined(__NVCOMPILER) +# define FMT_BUILTIN_CTZLL(n) __builtin_ctzll(n) +# endif +#endif + +#if FMT_MSC_VERSION +# include // _BitScanReverse[64], _BitScanForward[64], _umul128 +#endif + +// Some compilers masquerade as both MSVC and GCC-likes or otherwise support +// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the +// MSVC intrinsics if the clz and clzll builtins are not available. +#if FMT_MSC_VERSION && !defined(FMT_BUILTIN_CLZLL) && \ + !defined(FMT_BUILTIN_CTZLL) +FMT_BEGIN_NAMESPACE +namespace detail { +// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning. +# if !defined(__clang__) +# pragma intrinsic(_BitScanForward) +# pragma intrinsic(_BitScanReverse) +# if defined(_WIN64) +# pragma intrinsic(_BitScanForward64) +# pragma intrinsic(_BitScanReverse64) +# endif +# endif + +inline auto clz(uint32_t x) -> int { + unsigned long r = 0; + _BitScanReverse(&r, x); + FMT_ASSERT(x != 0, ""); + // Static analysis complains about using uninitialized data + // "r", but the only way that can happen is if "x" is 0, + // which the callers guarantee to not happen. + FMT_MSC_WARNING(suppress : 6102) + return 31 ^ static_cast(r); +} +# define FMT_BUILTIN_CLZ(n) detail::clz(n) + +inline auto clzll(uint64_t x) -> int { + unsigned long r = 0; +# ifdef _WIN64 + _BitScanReverse64(&r, x); +# else + // Scan the high 32 bits. + if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 ^ (r + 32); + // Scan the low 32 bits. + _BitScanReverse(&r, static_cast(x)); +# endif + FMT_ASSERT(x != 0, ""); + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. + return 63 ^ static_cast(r); +} +# define FMT_BUILTIN_CLZLL(n) detail::clzll(n) + +inline auto ctz(uint32_t x) -> int { + unsigned long r = 0; + _BitScanForward(&r, x); + FMT_ASSERT(x != 0, ""); + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. + return static_cast(r); +} +# define FMT_BUILTIN_CTZ(n) detail::ctz(n) + +inline auto ctzll(uint64_t x) -> int { + unsigned long r = 0; + FMT_ASSERT(x != 0, ""); + FMT_MSC_WARNING(suppress : 6102) // Suppress a bogus static analysis warning. +# ifdef _WIN64 + _BitScanForward64(&r, x); +# else + // Scan the low 32 bits. + if (_BitScanForward(&r, static_cast(x))) return static_cast(r); + // Scan the high 32 bits. + _BitScanForward(&r, static_cast(x >> 32)); + r += 32; +# endif + return static_cast(r); +} +# define FMT_BUILTIN_CTZLL(n) detail::ctzll(n) +} // namespace detail +FMT_END_NAMESPACE +#endif + +FMT_BEGIN_NAMESPACE +namespace detail { + +FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { + ignore_unused(condition); +#ifdef FMT_FUZZ + if (condition) throw std::runtime_error("fuzzing limit reached"); +#endif +} + +template struct string_literal { + static constexpr CharT value[sizeof...(C)] = {C...}; + constexpr operator basic_string_view() const { + return {value, sizeof...(C)}; + } +}; + +#if FMT_CPLUSPLUS < 201703L +template +constexpr CharT string_literal::value[sizeof...(C)]; +#endif + +template class formatbuf : public Streambuf { + private: + using char_type = typename Streambuf::char_type; + using streamsize = decltype(std::declval().sputn(nullptr, 0)); + using int_type = typename Streambuf::int_type; + using traits_type = typename Streambuf::traits_type; + + buffer& buffer_; + + public: + explicit formatbuf(buffer& buf) : buffer_(buf) {} + + protected: + // The put area is always empty. This makes the implementation simpler and has + // the advantage that the streambuf and the buffer are always in sync and + // sputc never writes into uninitialized memory. A disadvantage is that each + // call to sputc always results in a (virtual) call to overflow. There is no + // disadvantage here for sputn since this always results in a call to xsputn. + + auto overflow(int_type ch) -> int_type override { + if (!traits_type::eq_int_type(ch, traits_type::eof())) + buffer_.push_back(static_cast(ch)); + return ch; + } + + auto xsputn(const char_type* s, streamsize count) -> streamsize override { + buffer_.append(s, s + count); + return count; + } +}; + +// Implementation of std::bit_cast for pre-C++20. +template +FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { +#ifdef __cpp_lib_bit_cast + if (is_constant_evaluated()) return std::bit_cast(from); +#endif + auto to = To(); + // The cast suppresses a bogus -Wclass-memaccess on GCC. + std::memcpy(static_cast(&to), &from, sizeof(to)); + return to; +} + +inline auto is_big_endian() -> bool { +#ifdef _WIN32 + return false; +#elif defined(__BIG_ENDIAN__) + return true; +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + return __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; +#else + struct bytes { + char data[sizeof(int)]; + }; + return bit_cast(1).data[0] == 0; +#endif +} + +class uint128_fallback { + private: + uint64_t lo_, hi_; + + friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept; + + public: + constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} + constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} + + constexpr uint64_t high() const noexcept { return hi_; } + constexpr uint64_t low() const noexcept { return lo_; } + + template ::value)> + constexpr explicit operator T() const { + return static_cast(lo_); + } + + friend constexpr auto operator==(const uint128_fallback& lhs, + const uint128_fallback& rhs) -> bool { + return lhs.hi_ == rhs.hi_ && lhs.lo_ == rhs.lo_; + } + friend constexpr auto operator!=(const uint128_fallback& lhs, + const uint128_fallback& rhs) -> bool { + return !(lhs == rhs); + } + friend constexpr auto operator>(const uint128_fallback& lhs, + const uint128_fallback& rhs) -> bool { + return lhs.hi_ != rhs.hi_ ? lhs.hi_ > rhs.hi_ : lhs.lo_ > rhs.lo_; + } + friend constexpr auto operator|(const uint128_fallback& lhs, + const uint128_fallback& rhs) + -> uint128_fallback { + return {lhs.hi_ | rhs.hi_, lhs.lo_ | rhs.lo_}; + } + friend constexpr auto operator&(const uint128_fallback& lhs, + const uint128_fallback& rhs) + -> uint128_fallback { + return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_}; + } + friend auto operator+(const uint128_fallback& lhs, + const uint128_fallback& rhs) -> uint128_fallback { + auto result = uint128_fallback(lhs); + result += rhs; + return result; + } + friend auto operator*(const uint128_fallback& lhs, uint32_t rhs) + -> uint128_fallback { + FMT_ASSERT(lhs.hi_ == 0, ""); + uint64_t hi = (lhs.lo_ >> 32) * rhs; + uint64_t lo = (lhs.lo_ & ~uint32_t()) * rhs; + uint64_t new_lo = (hi << 32) + lo; + return {(hi >> 32) + (new_lo < lo ? 1 : 0), new_lo}; + } + friend auto operator-(const uint128_fallback& lhs, uint64_t rhs) + -> uint128_fallback { + return {lhs.hi_ - (lhs.lo_ < rhs ? 1 : 0), lhs.lo_ - rhs}; + } + FMT_CONSTEXPR auto operator>>(int shift) const -> uint128_fallback { + if (shift == 64) return {0, hi_}; + if (shift > 64) return uint128_fallback(0, hi_) >> (shift - 64); + return {hi_ >> shift, (hi_ << (64 - shift)) | (lo_ >> shift)}; + } + FMT_CONSTEXPR auto operator<<(int shift) const -> uint128_fallback { + if (shift == 64) return {lo_, 0}; + if (shift > 64) return uint128_fallback(lo_, 0) << (shift - 64); + return {hi_ << shift | (lo_ >> (64 - shift)), (lo_ << shift)}; + } + FMT_CONSTEXPR auto operator>>=(int shift) -> uint128_fallback& { + return *this = *this >> shift; + } + FMT_CONSTEXPR void operator+=(uint128_fallback n) { + uint64_t new_lo = lo_ + n.lo_; + uint64_t new_hi = hi_ + n.hi_ + (new_lo < lo_ ? 1 : 0); + FMT_ASSERT(new_hi >= hi_, ""); + lo_ = new_lo; + hi_ = new_hi; + } + + FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + if (is_constant_evaluated()) { + lo_ += n; + hi_ += (lo_ < n ? 1 : 0); + return *this; + } +#if FMT_HAS_BUILTIN(__builtin_addcll) && !defined(__ibmxl__) + unsigned long long carry; + lo_ = __builtin_addcll(lo_, n, 0, &carry); + hi_ += carry; +#elif FMT_HAS_BUILTIN(__builtin_ia32_addcarryx_u64) && !defined(__ibmxl__) + unsigned long long result; + auto carry = __builtin_ia32_addcarryx_u64(0, lo_, n, &result); + lo_ = result; + hi_ += carry; +#elif defined(_MSC_VER) && defined(_M_X64) + auto carry = _addcarry_u64(0, lo_, n, &lo_); + _addcarry_u64(carry, hi_, 0, &hi_); +#else + lo_ += n; + hi_ += (lo_ < n ? 1 : 0); +#endif + return *this; + } +}; + +using uint128_t = conditional_t; + +#ifdef UINTPTR_MAX +using uintptr_t = ::uintptr_t; +#else +using uintptr_t = uint128_t; +#endif + +// Returns the largest possible value for type T. Same as +// std::numeric_limits::max() but shorter and not affected by the max macro. +template constexpr auto max_value() -> T { + return (std::numeric_limits::max)(); +} +template constexpr auto num_bits() -> int { + return std::numeric_limits::digits; +} +// std::numeric_limits::digits may return 0 for 128-bit ints. +template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } + +// A heterogeneous bit_cast used for converting 96-bit long double to uint128_t +// and 128-bit pointers to uint128_fallback. +template sizeof(From))> +inline auto bit_cast(const From& from) -> To { + constexpr auto size = static_cast(sizeof(From) / sizeof(unsigned)); + struct data_t { + unsigned value[static_cast(size)]; + } data = bit_cast(from); + auto result = To(); + if (const_check(is_big_endian())) { + for (int i = 0; i < size; ++i) + result = (result << num_bits()) | data.value[i]; + } else { + for (int i = size - 1; i >= 0; --i) + result = (result << num_bits()) | data.value[i]; + } + return result; +} + +FMT_INLINE void assume(bool condition) { + (void)condition; +#if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION + __builtin_assume(condition); +#endif +} + +// An approximation of iterator_t for pre-C++20 systems. +template +using iterator_t = decltype(std::begin(std::declval())); +template using sentinel_t = decltype(std::end(std::declval())); + +// A workaround for std::string not having mutable data() until C++17. +template +inline auto get_data(std::basic_string& s) -> Char* { + return &s[0]; +} +template +inline auto get_data(Container& c) -> typename Container::value_type* { + return c.data(); +} + +#if defined(_SECURE_SCL) && _SECURE_SCL +// Make a checked iterator to avoid MSVC warnings. +template using checked_ptr = stdext::checked_array_iterator; +template +constexpr auto make_checked(T* p, size_t size) -> checked_ptr { + return {p, size}; +} +#else +template using checked_ptr = T*; +template constexpr auto make_checked(T* p, size_t) -> T* { + return p; +} +#endif + +// Attempts to reserve space for n extra characters in the output range. +// Returns a pointer to the reserved range or a reference to it. +template ::value)> +#if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION +__attribute__((no_sanitize("undefined"))) +#endif +inline auto +reserve(std::back_insert_iterator it, size_t n) + -> checked_ptr { + Container& c = get_container(it); + size_t size = c.size(); + c.resize(size + n); + return make_checked(get_data(c) + size, n); +} + +template +inline auto reserve(buffer_appender it, size_t n) -> buffer_appender { + buffer& buf = get_container(it); + buf.try_reserve(buf.size() + n); + return it; +} + +template +constexpr auto reserve(Iterator& it, size_t) -> Iterator& { + return it; +} + +template +using reserve_iterator = + remove_reference_t(), 0))>; + +template +constexpr auto to_pointer(OutputIt, size_t) -> T* { + return nullptr; +} +template auto to_pointer(buffer_appender it, size_t n) -> T* { + buffer& buf = get_container(it); + auto size = buf.size(); + if (buf.capacity() < size + n) return nullptr; + buf.try_resize(size + n); + return buf.data() + size; +} + +template ::value)> +inline auto base_iterator(std::back_insert_iterator& it, + checked_ptr) + -> std::back_insert_iterator { + return it; +} + +template +constexpr auto base_iterator(Iterator, Iterator it) -> Iterator { + return it; +} + +// is spectacularly slow to compile in C++20 so use a simple fill_n +// instead (#1998). +template +FMT_CONSTEXPR auto fill_n(OutputIt out, Size count, const T& value) + -> OutputIt { + for (Size i = 0; i < count; ++i) *out++ = value; + return out; +} +template +FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* { + if (is_constant_evaluated()) { + return fill_n(out, count, value); + } + std::memset(out, value, to_unsigned(count)); + return out + count; +} + +#ifdef __cpp_char8_t +using char8_type = char8_t; +#else +enum char8_type : unsigned char {}; +#endif + +template +FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end, + OutputIt out) -> OutputIt { + return copy_str(begin, end, out); +} + +// A public domain branchless UTF-8 decoder by Christopher Wellons: +// https://github.com/skeeto/branchless-utf8 +/* Decode the next character, c, from s, reporting errors in e. + * + * Since this is a branchless decoder, four bytes will be read from the + * buffer regardless of the actual length of the next character. This + * means the buffer _must_ have at least three bytes of zero padding + * following the end of the data stream. + * + * Errors are reported in e, which will be non-zero if the parsed + * character was somehow invalid: invalid byte sequence, non-canonical + * encoding, or a surrogate half. + * + * The function returns a pointer to the next character. When an error + * occurs, this pointer will be a guess that depends on the particular + * error, but it will always advance at least one byte. + */ +FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) + -> const char* { + constexpr const int masks[] = {0x00, 0x7f, 0x1f, 0x0f, 0x07}; + constexpr const uint32_t mins[] = {4194304, 0, 128, 2048, 65536}; + constexpr const int shiftc[] = {0, 18, 12, 6, 0}; + constexpr const int shifte[] = {0, 6, 4, 2, 0}; + + int len = code_point_length_impl(*s); + // Compute the pointer to the next character early so that the next + // iteration can start working on the next character. Neither Clang + // nor GCC figure out this reordering on their own. + const char* next = s + len + !len; + + using uchar = unsigned char; + + // Assume a four-byte character and load four bytes. Unused bits are + // shifted out. + *c = uint32_t(uchar(s[0]) & masks[len]) << 18; + *c |= uint32_t(uchar(s[1]) & 0x3f) << 12; + *c |= uint32_t(uchar(s[2]) & 0x3f) << 6; + *c |= uint32_t(uchar(s[3]) & 0x3f) << 0; + *c >>= shiftc[len]; + + // Accumulate the various error conditions. + *e = (*c < mins[len]) << 6; // non-canonical encoding + *e |= ((*c >> 11) == 0x1b) << 7; // surrogate half? + *e |= (*c > 0x10FFFF) << 8; // out of range? + *e |= (uchar(s[1]) & 0xc0) >> 2; + *e |= (uchar(s[2]) & 0xc0) >> 4; + *e |= uchar(s[3]) >> 6; + *e ^= 0x2a; // top two bits of each tail byte correct? + *e >>= shifte[len]; + + return next; +} + +constexpr uint32_t invalid_code_point = ~uint32_t(); + +// Invokes f(cp, sv) for every code point cp in s with sv being the string view +// corresponding to the code point. cp is invalid_code_point on error. +template +FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { + auto decode = [f](const char* buf_ptr, const char* ptr) { + auto cp = uint32_t(); + auto error = 0; + auto end = utf8_decode(buf_ptr, &cp, &error); + bool result = f(error ? invalid_code_point : cp, + string_view(ptr, error ? 1 : to_unsigned(end - buf_ptr))); + return result ? (error ? buf_ptr + 1 : end) : nullptr; + }; + auto p = s.data(); + const size_t block_size = 4; // utf8_decode always reads blocks of 4 chars. + if (s.size() >= block_size) { + for (auto end = p + s.size() - block_size + 1; p < end;) { + p = decode(p, p); + if (!p) return; + } + } + if (auto num_chars_left = s.data() + s.size() - p) { + char buf[2 * block_size - 1] = {}; + copy_str(p, p + num_chars_left, buf); + const char* buf_ptr = buf; + do { + auto end = decode(buf_ptr, p); + if (!end) return; + p += end - buf_ptr; + buf_ptr = end; + } while (buf_ptr - buf < num_chars_left); + } +} + +template +inline auto compute_width(basic_string_view s) -> size_t { + return s.size(); +} + +// Computes approximate display width of a UTF-8 string. +FMT_CONSTEXPR inline size_t compute_width(string_view s) { + size_t num_code_points = 0; + // It is not a lambda for compatibility with C++14. + struct count_code_points { + size_t* count; + FMT_CONSTEXPR auto operator()(uint32_t cp, string_view) const -> bool { + *count += detail::to_unsigned( + 1 + + (cp >= 0x1100 && + (cp <= 0x115f || // Hangul Jamo init. consonants + cp == 0x2329 || // LEFT-POINTING ANGLE BRACKET + cp == 0x232a || // RIGHT-POINTING ANGLE BRACKET + // CJK ... Yi except IDEOGRAPHIC HALF FILL SPACE: + (cp >= 0x2e80 && cp <= 0xa4cf && cp != 0x303f) || + (cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables + (cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs + (cp >= 0xfe10 && cp <= 0xfe19) || // Vertical Forms + (cp >= 0xfe30 && cp <= 0xfe6f) || // CJK Compatibility Forms + (cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms + (cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth Forms + (cp >= 0x20000 && cp <= 0x2fffd) || // CJK + (cp >= 0x30000 && cp <= 0x3fffd) || + // Miscellaneous Symbols and Pictographs + Emoticons: + (cp >= 0x1f300 && cp <= 0x1f64f) || + // Supplemental Symbols and Pictographs: + (cp >= 0x1f900 && cp <= 0x1f9ff)))); + return true; + } + }; + // We could avoid branches by using utf8_decode directly. + for_each_codepoint(s, count_code_points{&num_code_points}); + return num_code_points; +} + +inline auto compute_width(basic_string_view s) -> size_t { + return compute_width( + string_view(reinterpret_cast(s.data()), s.size())); +} + +template +inline auto code_point_index(basic_string_view s, size_t n) -> size_t { + size_t size = s.size(); + return n < size ? n : size; +} + +// Calculates the index of the nth code point in a UTF-8 string. +inline auto code_point_index(string_view s, size_t n) -> size_t { + const char* data = s.data(); + size_t num_code_points = 0; + for (size_t i = 0, size = s.size(); i != size; ++i) { + if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; + } + return s.size(); +} + +inline auto code_point_index(basic_string_view s, size_t n) + -> size_t { + return code_point_index( + string_view(reinterpret_cast(s.data()), s.size()), n); +} + +template struct is_integral : std::is_integral {}; +template <> struct is_integral : std::true_type {}; +template <> struct is_integral : std::true_type {}; + +template +using is_signed = + std::integral_constant::is_signed || + std::is_same::value>; + +template +using is_integer = + bool_constant::value && !std::is_same::value && + !std::is_same::value && + !std::is_same::value>; + +#ifndef FMT_USE_FLOAT128 +# ifdef __SIZEOF_FLOAT128__ +# define FMT_USE_FLOAT128 1 +# else +# define FMT_USE_FLOAT128 0 +# endif +#endif +#if FMT_USE_FLOAT128 +using float128 = __float128; +#else +using float128 = void; +#endif +template using is_float128 = std::is_same; + +template +using is_floating_point = + bool_constant::value || is_float128::value>; + +template ::value> +struct is_fast_float : bool_constant::is_iec559 && + sizeof(T) <= sizeof(double)> {}; +template struct is_fast_float : std::false_type {}; + +template +using is_double_double = bool_constant::digits == 106>; + +#ifndef FMT_USE_FULL_CACHE_DRAGONBOX +# define FMT_USE_FULL_CACHE_DRAGONBOX 0 +#endif + +template +template +void buffer::append(const U* begin, const U* end) { + while (begin != end) { + auto count = to_unsigned(end - begin); + try_reserve(size_ + count); + auto free_cap = capacity_ - size_; + if (free_cap < count) count = free_cap; + std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); + size_ += count; + begin += count; + } +} + +template +struct is_locale : std::false_type {}; +template +struct is_locale> : std::true_type {}; +} // namespace detail + +FMT_MODULE_EXPORT_BEGIN + +// The number of characters to store in the basic_memory_buffer object itself +// to avoid dynamic memory allocation. +enum { inline_buffer_size = 500 }; + +/** + \rst + A dynamically growing memory buffer for trivially copyable/constructible types + with the first ``SIZE`` elements stored in the object itself. + + You can use the ``memory_buffer`` type alias for ``char`` instead. + + **Example**:: + + auto out = fmt::memory_buffer(); + format_to(std::back_inserter(out), "The answer is {}.", 42); + + This will append the following output to the ``out`` object: + + .. code-block:: none + + The answer is 42. + + The output can be converted to an ``std::string`` with ``to_string(out)``. + \endrst + */ +template > +class basic_memory_buffer final : public detail::buffer { + private: + T store_[SIZE]; + + // Don't inherit from Allocator avoid generating type_info for it. + Allocator alloc_; + + // Deallocate memory allocated by the buffer. + FMT_CONSTEXPR20 void deallocate() { + T* data = this->data(); + if (data != store_) alloc_.deallocate(data, this->capacity()); + } + + protected: + FMT_CONSTEXPR20 void grow(size_t size) override; + + public: + using value_type = T; + using const_reference = const T&; + + FMT_CONSTEXPR20 explicit basic_memory_buffer( + const Allocator& alloc = Allocator()) + : alloc_(alloc) { + this->set(store_, SIZE); + if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); + } + FMT_CONSTEXPR20 ~basic_memory_buffer() { deallocate(); } + + private: + // Move data from other to this buffer. + FMT_CONSTEXPR20 void move(basic_memory_buffer& other) { + alloc_ = std::move(other.alloc_); + T* data = other.data(); + size_t size = other.size(), capacity = other.capacity(); + if (data == other.store_) { + this->set(store_, capacity); + detail::copy_str(other.store_, other.store_ + size, + detail::make_checked(store_, capacity)); + } else { + this->set(data, capacity); + // Set pointer to the inline array so that delete is not called + // when deallocating. + other.set(other.store_, 0); + other.clear(); + } + this->resize(size); + } + + public: + /** + \rst + Constructs a :class:`fmt::basic_memory_buffer` object moving the content + of the other object to it. + \endrst + */ + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + move(other); + } + + /** + \rst + Moves the content of the other ``basic_memory_buffer`` object to this one. + \endrst + */ + auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& { + FMT_ASSERT(this != &other, ""); + deallocate(); + move(other); + return *this; + } + + // Returns a copy of the allocator associated with this buffer. + auto get_allocator() const -> Allocator { return alloc_; } + + /** + Resizes the buffer to contain *count* elements. If T is a POD type new + elements may not be initialized. + */ + FMT_CONSTEXPR20 void resize(size_t count) { this->try_resize(count); } + + /** Increases the buffer capacity to *new_capacity*. */ + void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } + + // Directly append data into the buffer + using detail::buffer::append; + template + void append(const ContiguousRange& range) { + append(range.data(), range.data() + range.size()); + } +}; + +template +FMT_CONSTEXPR20 void basic_memory_buffer::grow( + size_t size) { + detail::abort_fuzzing_if(size > 5000); + const size_t max_size = std::allocator_traits::max_size(alloc_); + size_t old_capacity = this->capacity(); + size_t new_capacity = old_capacity + old_capacity / 2; + if (size > new_capacity) + new_capacity = size; + else if (new_capacity > max_size) + new_capacity = size > max_size ? size : max_size; + T* old_data = this->data(); + T* new_data = + std::allocator_traits::allocate(alloc_, new_capacity); + // The following code doesn't throw, so the raw pointer above doesn't leak. + std::uninitialized_copy(old_data, old_data + this->size(), + detail::make_checked(new_data, new_capacity)); + this->set(new_data, new_capacity); + // deallocate must not throw according to the standard, but even if it does, + // the buffer already uses the new storage and will deallocate it in + // destructor. + if (old_data != store_) alloc_.deallocate(old_data, old_capacity); +} + +using memory_buffer = basic_memory_buffer; + +template +struct is_contiguous> : std::true_type { +}; + +namespace detail { +#ifdef _WIN32 +FMT_API bool write_console(std::FILE* f, string_view text); +#endif +FMT_API void print(std::FILE*, string_view); +} // namespace detail + +/** An error reported from a formatting function. */ +FMT_CLASS_API +class FMT_API format_error : public std::runtime_error { + public: + using std::runtime_error::runtime_error; + format_error(const format_error&) = default; + format_error& operator=(const format_error&) = default; + format_error(format_error&&) = default; + format_error& operator=(format_error&&) = default; + ~format_error() noexcept override FMT_MSC_DEFAULT; +}; + +namespace detail_exported { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +template struct fixed_string { + constexpr fixed_string(const Char (&str)[N]) { + detail::copy_str(static_cast(str), + str + N, data); + } + Char data[N] = {}; +}; +#endif + +// Converts a compile-time string to basic_string_view. +template +constexpr auto compile_string_to_view(const Char (&s)[N]) + -> basic_string_view { + // Remove trailing NUL character if needed. Won't be present if this is used + // with a raw character array (i.e. not defined as a string). + return {s, N - (std::char_traits::to_int_type(s[N - 1]) == 0 ? 1 : 0)}; +} +template +constexpr auto compile_string_to_view(detail::std_string_view s) + -> basic_string_view { + return {s.data(), s.size()}; +} +} // namespace detail_exported + +class loc_value { + private: + basic_format_arg value_; + + public: + template ::value)> + loc_value(T value) : value_(detail::make_arg(value)) {} + + template ::value)> + loc_value(T) {} + + template auto visit(Visitor&& vis) -> decltype(vis(0)) { + return visit_format_arg(vis, value_); + } +}; + +// A locale facet that formats values in UTF-8. +// It is parameterized on the locale to avoid the heavy include. +template class format_facet : public Locale::facet { + private: + std::string separator_; + std::string grouping_; + std::string decimal_point_; + + protected: + virtual auto do_put(appender out, loc_value val, + const format_specs& specs) const -> bool; + + public: + static FMT_API typename Locale::id id; + + explicit format_facet(Locale& loc); + explicit format_facet(string_view sep = "", + std::initializer_list g = {3}, + std::string decimal_point = ".") + : separator_(sep.data(), sep.size()), + grouping_(g.begin(), g.end()), + decimal_point_(decimal_point) {} + + auto put(appender out, loc_value val, const format_specs& specs) const + -> bool { + return do_put(out, val, specs); + } +}; + +FMT_BEGIN_DETAIL_NAMESPACE + +// Returns true if value is negative, false otherwise. +// Same as `value < 0` but doesn't produce warnings if T is an unsigned type. +template ::value)> +constexpr auto is_negative(T value) -> bool { + return value < 0; +} +template ::value)> +constexpr auto is_negative(T) -> bool { + return false; +} + +template +FMT_CONSTEXPR auto is_supported_floating_point(T) -> bool { + if (std::is_same()) return FMT_USE_FLOAT; + if (std::is_same()) return FMT_USE_DOUBLE; + if (std::is_same()) return FMT_USE_LONG_DOUBLE; + return true; +} + +// Smallest of uint32_t, uint64_t, uint128_t that is large enough to +// represent all values of an integral type T. +template +using uint32_or_64_or_128_t = + conditional_t() <= 32 && !FMT_REDUCE_INT_INSTANTIATIONS, + uint32_t, + conditional_t() <= 64, uint64_t, uint128_t>>; +template +using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; + +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ + (factor)*1000000, (factor)*10000000, (factor)*100000000, \ + (factor)*1000000000 + +// Converts value in the range [0, 100) to a string. +constexpr const char* digits2(size_t value) { + // GCC generates slightly better code when value is pointer-size. + return &"0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"[value * 2]; +} + +// Sign is a template parameter to workaround a bug in gcc 4.8. +template constexpr Char sign(Sign s) { +#if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 + static_assert(std::is_same::value, ""); +#endif + return static_cast("\0-+ "[s]); +} + +template FMT_CONSTEXPR auto count_digits_fallback(T n) -> int { + int count = 1; + for (;;) { + // Integer division is slow so do it for a group of four digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + if (n < 10) return count; + if (n < 100) return count + 1; + if (n < 1000) return count + 2; + if (n < 10000) return count + 3; + n /= 10000u; + count += 4; + } +} +#if FMT_USE_INT128 +FMT_CONSTEXPR inline auto count_digits(uint128_opt n) -> int { + return count_digits_fallback(n); +} +#endif + +#ifdef FMT_BUILTIN_CLZLL +// It is a separate function rather than a part of count_digits to workaround +// the lack of static constexpr in constexpr functions. +inline auto do_count_digits(uint64_t n) -> int { + // This has comparable performance to the version by Kendall Willets + // (https://github.com/fmtlib/format-benchmark/blob/master/digits10) + // but uses smaller tables. + // Maps bsr(n) to ceil(log10(pow(2, bsr(n) + 1) - 1)). + static constexpr uint8_t bsr2log10[] = { + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, + 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, 20}; + auto t = bsr2log10[FMT_BUILTIN_CLZLL(n | 1) ^ 63]; + static constexpr const uint64_t zero_or_powers_of_10[] = { + 0, 0, FMT_POWERS_OF_10(1U), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; + return t - (n < zero_or_powers_of_10[t]); +} +#endif + +// Returns the number of decimal digits in n. Leading zeros are not counted +// except for n == 0 in which case count_digits returns 1. +FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int { +#ifdef FMT_BUILTIN_CLZLL + if (!is_constant_evaluated()) { + return do_count_digits(n); + } +#endif + return count_digits_fallback(n); +} + +// Counts the number of digits in n. BITS = log2(radix). +template +FMT_CONSTEXPR auto count_digits(UInt n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (!is_constant_evaluated() && num_bits() == 32) + return (FMT_BUILTIN_CLZ(static_cast(n) | 1) ^ 31) / BITS + 1; +#endif + // Lambda avoids unreachable code warnings from NVHPC. + return [](UInt m) { + int num_digits = 0; + do { + ++num_digits; + } while ((m >>= BITS) != 0); + return num_digits; + }(n); +} + +#ifdef FMT_BUILTIN_CLZ +// It is a separate function rather than a part of count_digits to workaround +// the lack of static constexpr in constexpr functions. +FMT_INLINE auto do_count_digits(uint32_t n) -> int { +// An optimization by Kendall Willets from https://bit.ly/3uOIQrB. +// This increments the upper 32 bits (log10(T) - 1) when >= T is added. +# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T) + static constexpr uint64_t table[] = { + FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 + FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 + FMT_INC(100), FMT_INC(100), FMT_INC(100), // 512 + FMT_INC(1000), FMT_INC(1000), FMT_INC(1000), // 4096 + FMT_INC(10000), FMT_INC(10000), FMT_INC(10000), // 32k + FMT_INC(100000), FMT_INC(100000), FMT_INC(100000), // 256k + FMT_INC(1000000), FMT_INC(1000000), FMT_INC(1000000), // 2048k + FMT_INC(10000000), FMT_INC(10000000), FMT_INC(10000000), // 16M + FMT_INC(100000000), FMT_INC(100000000), FMT_INC(100000000), // 128M + FMT_INC(1000000000), FMT_INC(1000000000), FMT_INC(1000000000), // 1024M + FMT_INC(1000000000), FMT_INC(1000000000) // 4B + }; + auto inc = table[FMT_BUILTIN_CLZ(n | 1) ^ 31]; + return static_cast((n + inc) >> 32); +} +#endif + +// Optional version of count_digits for better performance on 32-bit platforms. +FMT_CONSTEXPR20 inline auto count_digits(uint32_t n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (!is_constant_evaluated()) { + return do_count_digits(n); + } +#endif + return count_digits_fallback(n); +} + +template constexpr auto digits10() noexcept -> int { + return std::numeric_limits::digits10; +} +template <> constexpr auto digits10() noexcept -> int { return 38; } +template <> constexpr auto digits10() noexcept -> int { return 38; } + +template struct thousands_sep_result { + std::string grouping; + Char thousands_sep; +}; + +template +FMT_API auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result; +template +inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { + auto result = thousands_sep_impl(loc); + return {result.grouping, Char(result.thousands_sep)}; +} +template <> +inline auto thousands_sep(locale_ref loc) -> thousands_sep_result { + return thousands_sep_impl(loc); +} + +template +FMT_API auto decimal_point_impl(locale_ref loc) -> Char; +template inline auto decimal_point(locale_ref loc) -> Char { + return Char(decimal_point_impl(loc)); +} +template <> inline auto decimal_point(locale_ref loc) -> wchar_t { + return decimal_point_impl(loc); +} + +// Compares two characters for equality. +template auto equal2(const Char* lhs, const char* rhs) -> bool { + return lhs[0] == Char(rhs[0]) && lhs[1] == Char(rhs[1]); +} +inline auto equal2(const char* lhs, const char* rhs) -> bool { + return memcmp(lhs, rhs, 2) == 0; +} + +// Copies two characters from src to dst. +template +FMT_CONSTEXPR20 FMT_INLINE void copy2(Char* dst, const char* src) { + if (!is_constant_evaluated() && sizeof(Char) == sizeof(char)) { + memcpy(dst, src, 2); + return; + } + *dst++ = static_cast(*src++); + *dst = static_cast(*src); +} + +template struct format_decimal_result { + Iterator begin; + Iterator end; +}; + +// Formats a decimal unsigned integer value writing into out pointing to a +// buffer of specified size. The caller must ensure that the buffer is large +// enough. +template +FMT_CONSTEXPR20 auto format_decimal(Char* out, UInt value, int size) + -> format_decimal_result { + FMT_ASSERT(size >= count_digits(value), "invalid digit count"); + out += size; + Char* end = out; + while (value >= 100) { + // Integer division is slow so do it for a group of two digits instead + // of for every digit. The idea comes from the talk by Alexandrescu + // "Three Optimization Tips for C++". See speed-test for a comparison. + out -= 2; + copy2(out, digits2(static_cast(value % 100))); + value /= 100; + } + if (value < 10) { + *--out = static_cast('0' + value); + return {out, end}; + } + out -= 2; + copy2(out, digits2(static_cast(value))); + return {out, end}; +} + +template >::value)> +FMT_CONSTEXPR inline auto format_decimal(Iterator out, UInt value, int size) + -> format_decimal_result { + // Buffer is large enough to hold all digits (digits10 + 1). + Char buffer[digits10() + 1] = {}; + auto end = format_decimal(buffer, value, size).end; + return {out, detail::copy_str_noinline(buffer, end, out)}; +} + +template +FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits, + bool upper = false) -> Char* { + buffer += num_digits; + Char* end = buffer; + do { + const char* digits = upper ? "0123456789ABCDEF" : "0123456789abcdef"; + unsigned digit = static_cast(value & ((1 << BASE_BITS) - 1)); + *--buffer = static_cast(BASE_BITS < 4 ? static_cast('0' + digit) + : digits[digit]); + } while ((value >>= BASE_BITS) != 0); + return end; +} + +template +inline auto format_uint(It out, UInt value, int num_digits, bool upper = false) + -> It { + if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { + format_uint(ptr, value, num_digits, upper); + return out; + } + // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). + char buffer[num_bits() / BASE_BITS + 1]; + format_uint(buffer, value, num_digits, upper); + return detail::copy_str_noinline(buffer, buffer + num_digits, out); +} + +// A converter from UTF-8 to UTF-16. +class utf8_to_utf16 { + private: + basic_memory_buffer buffer_; + + public: + FMT_API explicit utf8_to_utf16(string_view s); + operator basic_string_view() const { return {&buffer_[0], size()}; } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const wchar_t* { return &buffer_[0]; } + auto str() const -> std::wstring { return {&buffer_[0], size()}; } +}; + +namespace dragonbox { + +// Type-specific information that Dragonbox uses. +template struct float_info; + +template <> struct float_info { + using carrier_uint = uint32_t; + static const int exponent_bits = 8; + static const int kappa = 1; + static const int big_divisor = 100; + static const int small_divisor = 10; + static const int min_k = -31; + static const int max_k = 46; + static const int shorter_interval_tie_lower_threshold = -35; + static const int shorter_interval_tie_upper_threshold = -35; +}; + +template <> struct float_info { + using carrier_uint = uint64_t; + static const int exponent_bits = 11; + static const int kappa = 2; + static const int big_divisor = 1000; + static const int small_divisor = 100; + static const int min_k = -292; + static const int max_k = 326; + static const int shorter_interval_tie_lower_threshold = -77; + static const int shorter_interval_tie_upper_threshold = -77; +}; + +// An 80- or 128-bit floating point number. +template +struct float_info::digits == 64 || + std::numeric_limits::digits == 113 || + is_float128::value>> { + using carrier_uint = detail::uint128_t; + static const int exponent_bits = 15; +}; + +// A double-double floating point number. +template +struct float_info::value>> { + using carrier_uint = detail::uint128_t; +}; + +template struct decimal_fp { + using significand_type = typename float_info::carrier_uint; + significand_type significand; + int exponent; +}; + +template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; +} // namespace dragonbox + +// Returns true iff Float has the implicit bit which is not stored. +template constexpr bool has_implicit_bit() { + // An 80-bit FP number has a 64-bit significand an no implicit bit. + return std::numeric_limits::digits != 64; +} + +// Returns the number of significand bits stored in Float. The implicit bit is +// not counted since it is not stored. +template constexpr int num_significand_bits() { + // std::numeric_limits may not support __float128. + return is_float128() ? 112 + : (std::numeric_limits::digits - + (has_implicit_bit() ? 1 : 0)); +} + +template +constexpr auto exponent_mask() -> + typename dragonbox::float_info::carrier_uint { + using uint = typename dragonbox::float_info::carrier_uint; + return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) + << num_significand_bits(); +} +template constexpr auto exponent_bias() -> int { + // std::numeric_limits may not support __float128. + return is_float128() ? 16383 + : std::numeric_limits::max_exponent - 1; +} + +// Writes the exponent exp in the form "[+-]d{2,3}" to buffer. +template +FMT_CONSTEXPR auto write_exponent(int exp, It it) -> It { + FMT_ASSERT(-10000 < exp && exp < 10000, "exponent out of range"); + if (exp < 0) { + *it++ = static_cast('-'); + exp = -exp; + } else { + *it++ = static_cast('+'); + } + if (exp >= 100) { + const char* top = digits2(to_unsigned(exp / 100)); + if (exp >= 1000) *it++ = static_cast(top[0]); + *it++ = static_cast(top[1]); + exp %= 100; + } + const char* d = digits2(to_unsigned(exp)); + *it++ = static_cast(d[0]); + *it++ = static_cast(d[1]); + return it; +} + +// A floating-point number f * pow(2, e) where F is an unsigned type. +template struct basic_fp { + F f; + int e; + + static constexpr const int num_significand_bits = + static_cast(sizeof(F) * num_bits()); + + constexpr basic_fp() : f(0), e(0) {} + constexpr basic_fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {} + + // Constructs fp from an IEEE754 floating-point number. + template FMT_CONSTEXPR basic_fp(Float n) { assign(n); } + + // Assigns n to this and return true iff predecessor is closer than successor. + template ::value)> + FMT_CONSTEXPR auto assign(Float n) -> bool { + static_assert(std::numeric_limits::digits <= 113, "unsupported FP"); + // Assume Float is in the format [sign][exponent][significand]. + using carrier_uint = typename dragonbox::float_info::carrier_uint; + const auto num_float_significand_bits = + detail::num_significand_bits(); + const auto implicit_bit = carrier_uint(1) << num_float_significand_bits; + const auto significand_mask = implicit_bit - 1; + auto u = bit_cast(n); + f = static_cast(u & significand_mask); + auto biased_e = static_cast((u & exponent_mask()) >> + num_float_significand_bits); + // The predecessor is closer if n is a normalized power of 2 (f == 0) + // other than the smallest normalized number (biased_e > 1). + auto is_predecessor_closer = f == 0 && biased_e > 1; + if (biased_e == 0) + biased_e = 1; // Subnormals use biased exponent 1 (min exponent). + else if (has_implicit_bit()) + f += static_cast(implicit_bit); + e = biased_e - exponent_bias() - num_float_significand_bits; + if (!has_implicit_bit()) ++e; + return is_predecessor_closer; + } + + template ::value)> + FMT_CONSTEXPR auto assign(Float n) -> bool { + static_assert(std::numeric_limits::is_iec559, "unsupported FP"); + return assign(static_cast(n)); + } +}; + +using fp = basic_fp; + +// Normalizes the value converted from double and multiplied by (1 << SHIFT). +template +FMT_CONSTEXPR basic_fp normalize(basic_fp value) { + // Handle subnormals. + const auto implicit_bit = F(1) << num_significand_bits(); + const auto shifted_implicit_bit = implicit_bit << SHIFT; + while ((value.f & shifted_implicit_bit) == 0) { + value.f <<= 1; + --value.e; + } + // Subtract 1 to account for hidden bit. + const auto offset = basic_fp::num_significand_bits - + num_significand_bits() - SHIFT - 1; + value.f <<= offset; + value.e -= offset; + return value; +} + +// Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. +FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +#if FMT_USE_INT128 + auto product = static_cast<__uint128_t>(lhs) * rhs; + auto f = static_cast(product >> 64); + return (static_cast(product) & (1ULL << 63)) != 0 ? f + 1 : f; +#else + // Multiply 32-bit parts of significands. + uint64_t mask = (1ULL << 32) - 1; + uint64_t a = lhs >> 32, b = lhs & mask; + uint64_t c = rhs >> 32, d = rhs & mask; + uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d; + // Compute mid 64-bit of result and round. + uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31); + return ac + (ad >> 32) + (bc >> 32) + (mid >> 32); +#endif +} + +FMT_CONSTEXPR inline fp operator*(fp x, fp y) { + return {multiply(x.f, y.f), x.e + y.e + 64}; +} + +template struct basic_data { + // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. + // These are generated by support/compute-powers.py. + static constexpr uint64_t pow10_significands[87] = { + 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, + 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, + 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, + 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, + 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, + 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, + 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, + 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, + 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, + 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, + 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, + 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, + 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, + 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, + 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, + 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, + 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, + 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, + 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, + 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, + 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, + 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, + 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, + 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, + 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, + 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, + 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, + 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, + 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, + }; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wnarrowing" +#endif + // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding + // to significands above. + static constexpr int16_t pow10_exponents[87] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, + -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, + -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, + -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, + -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, + 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, + 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, + 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +# pragma GCC diagnostic pop +#endif + + static constexpr uint64_t power_of_10_64[20] = { + 1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL), + 10000000000000000000ULL}; +}; + +#if FMT_CPLUSPLUS < 201703L +template constexpr uint64_t basic_data::pow10_significands[]; +template constexpr int16_t basic_data::pow10_exponents[]; +template constexpr uint64_t basic_data::power_of_10_64[]; +#endif + +// This is a struct rather than an alias to avoid shadowing warnings in gcc. +struct data : basic_data<> {}; + +// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its +// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. +FMT_CONSTEXPR inline fp get_cached_power(int min_exponent, + int& pow10_exponent) { + const int shift = 32; + // log10(2) = 0x0.4d104d427de7fbcc... + const int64_t significand = 0x4d104d427de7fbcc; + int index = static_cast( + ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) + + ((int64_t(1) << shift) - 1)) // ceil + >> 32 // arithmetic shift + ); + // Decimal exponent of the first (smallest) cached power of 10. + const int first_dec_exp = -348; + // Difference between 2 consecutive decimal exponents in cached powers of 10. + const int dec_exp_step = 8; + index = (index - first_dec_exp - 1) / dec_exp_step + 1; + pow10_exponent = first_dec_exp + index * dec_exp_step; + // Using *(x + index) instead of x[index] avoids an issue with some compilers + // using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode). + return {*(data::pow10_significands + index), + *(data::pow10_exponents + index)}; +} + +#ifndef _MSC_VER +# define FMT_SNPRINTF snprintf +#else +FMT_API auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) -> int; +# define FMT_SNPRINTF fmt_snprintf +#endif // _MSC_VER + +// Formats a floating-point number with snprintf using the hexfloat format. +template +auto snprintf_float(T value, int precision, float_specs specs, + buffer& buf) -> int { + // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. + FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); + FMT_ASSERT(specs.format == float_format::hex, ""); + static_assert(!std::is_same::value, ""); + + // Build the format string. + char format[7]; // The longest format is "%#.*Le". + char* format_ptr = format; + *format_ptr++ = '%'; + if (specs.showpoint) *format_ptr++ = '#'; + if (precision >= 0) { + *format_ptr++ = '.'; + *format_ptr++ = '*'; + } + if (std::is_same()) *format_ptr++ = 'L'; + *format_ptr++ = specs.upper ? 'A' : 'a'; + *format_ptr = '\0'; + + // Format using snprintf. + auto offset = buf.size(); + for (;;) { + auto begin = buf.data() + offset; + auto capacity = buf.capacity() - offset; + abort_fuzzing_if(precision > 100000); + // Suppress the warning about a nonliteral format string. + // Cannot use auto because of a bug in MinGW (#1532). + int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; + int result = precision >= 0 + ? snprintf_ptr(begin, capacity, format, precision, value) + : snprintf_ptr(begin, capacity, format, value); + if (result < 0) { + // The buffer will grow exponentially. + buf.try_reserve(buf.capacity() + 1); + continue; + } + auto size = to_unsigned(result); + // Size equal to capacity means that the last character was truncated. + if (size < capacity) { + buf.try_resize(size + offset); + return 0; + } + buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. + } +} + +template +using convert_float_result = + conditional_t::value || + std::numeric_limits::digits == + std::numeric_limits::digits, + double, T>; + +template +constexpr auto convert_float(T value) -> convert_float_result { + return static_cast>(value); +} + +template +FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, + const fill_t& fill) -> OutputIt { + auto fill_size = fill.size(); + if (fill_size == 1) return detail::fill_n(it, n, fill[0]); + auto data = fill.data(); + for (size_t i = 0; i < n; ++i) + it = copy_str(data, data + fill_size, it); + return it; +} + +// Writes the output of f, padded according to format specifications in specs. +// size: output size in code units. +// width: output display width in (terminal) column positions. +template +FMT_CONSTEXPR auto write_padded(OutputIt out, + const basic_format_specs& specs, + size_t size, size_t width, F&& f) -> OutputIt { + static_assert(align == align::left || align == align::right, ""); + unsigned spec_width = to_unsigned(specs.width); + size_t padding = spec_width > width ? spec_width - width : 0; + // Shifts are encoded as string literals because static constexpr is not + // supported in constexpr functions. + auto* shifts = align == align::left ? "\x1f\x1f\x00\x01" : "\x00\x1f\x00\x01"; + size_t left_padding = padding >> shifts[specs.align]; + size_t right_padding = padding - left_padding; + auto it = reserve(out, size + padding * specs.fill.size()); + if (left_padding != 0) it = fill(it, left_padding, specs.fill); + it = f(it); + if (right_padding != 0) it = fill(it, right_padding, specs.fill); + return base_iterator(out, it); +} + +template +constexpr auto write_padded(OutputIt out, const basic_format_specs& specs, + size_t size, F&& f) -> OutputIt { + return write_padded(out, specs, size, size, f); +} + +template +FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes, + const basic_format_specs& specs) + -> OutputIt { + return write_padded( + out, specs, bytes.size(), [bytes](reserve_iterator it) { + const char* data = bytes.data(); + return copy_str(data, data + bytes.size(), it); + }); +} + +template +auto write_ptr(OutputIt out, UIntPtr value, + const basic_format_specs* specs) -> OutputIt { + int num_digits = count_digits<4>(value); + auto size = to_unsigned(num_digits) + size_t(2); + auto write = [=](reserve_iterator it) { + *it++ = static_cast('0'); + *it++ = static_cast('x'); + return format_uint<4, Char>(it, value, num_digits); + }; + return specs ? write_padded(out, *specs, size, write) + : base_iterator(out, write(reserve(out, size))); +} + +// Returns true iff the code point cp is printable. +FMT_API auto is_printable(uint32_t cp) -> bool; + +inline auto needs_escape(uint32_t cp) -> bool { + return cp < 0x20 || cp == 0x7f || cp == '"' || cp == '\\' || + !is_printable(cp); +} + +template struct find_escape_result { + const Char* begin; + const Char* end; + uint32_t cp; +}; + +template +using make_unsigned_char = + typename conditional_t::value, + std::make_unsigned, + type_identity>::type; + +template +auto find_escape(const Char* begin, const Char* end) + -> find_escape_result { + for (; begin != end; ++begin) { + uint32_t cp = static_cast>(*begin); + if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue; + if (needs_escape(cp)) return {begin, begin + 1, cp}; + } + return {begin, nullptr, 0}; +} + +inline auto find_escape(const char* begin, const char* end) + -> find_escape_result { + if (!is_utf8()) return find_escape(begin, end); + auto result = find_escape_result{end, nullptr, 0}; + for_each_codepoint(string_view(begin, to_unsigned(end - begin)), + [&](uint32_t cp, string_view sv) { + if (needs_escape(cp)) { + result = {sv.begin(), sv.end(), cp}; + return false; + } + return true; + }); + return result; +} + +#define FMT_STRING_IMPL(s, base, explicit) \ + [] { \ + /* Use the hidden visibility as a workaround for a GCC bug (#1973). */ \ + /* Use a macro-like name to avoid shadowing warnings. */ \ + struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base { \ + using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t; \ + FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit \ + operator fmt::basic_string_view() const { \ + return fmt::detail_exported::compile_string_to_view(s); \ + } \ + }; \ + return FMT_COMPILE_STRING(); \ + }() + +/** + \rst + Constructs a compile-time format string from a string literal *s*. + + **Example**:: + + // A compile-time error because 'd' is an invalid specifier for strings. + std::string s = fmt::format(FMT_STRING("{:d}"), "foo"); + \endrst + */ +#define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string, ) + +template +auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt { + *out++ = static_cast('\\'); + *out++ = static_cast(prefix); + Char buf[width]; + fill_n(buf, width, static_cast('0')); + format_uint<4>(buf, cp, width); + return copy_str(buf, buf + width, out); +} + +template +auto write_escaped_cp(OutputIt out, const find_escape_result& escape) + -> OutputIt { + auto c = static_cast(escape.cp); + switch (escape.cp) { + case '\n': + *out++ = static_cast('\\'); + c = static_cast('n'); + break; + case '\r': + *out++ = static_cast('\\'); + c = static_cast('r'); + break; + case '\t': + *out++ = static_cast('\\'); + c = static_cast('t'); + break; + case '"': + FMT_FALLTHROUGH; + case '\'': + FMT_FALLTHROUGH; + case '\\': + *out++ = static_cast('\\'); + break; + default: + if (is_utf8()) { + if (escape.cp < 0x100) { + return write_codepoint<2, Char>(out, 'x', escape.cp); + } + if (escape.cp < 0x10000) { + return write_codepoint<4, Char>(out, 'u', escape.cp); + } + if (escape.cp < 0x110000) { + return write_codepoint<8, Char>(out, 'U', escape.cp); + } + } + for (Char escape_char : basic_string_view( + escape.begin, to_unsigned(escape.end - escape.begin))) { + out = write_codepoint<2, Char>(out, 'x', + static_cast(escape_char) & 0xFF); + } + return out; + } + *out++ = c; + return out; +} + +template +auto write_escaped_string(OutputIt out, basic_string_view str) + -> OutputIt { + *out++ = static_cast('"'); + auto begin = str.begin(), end = str.end(); + do { + auto escape = find_escape(begin, end); + out = copy_str(begin, escape.begin, out); + begin = escape.end; + if (!begin) break; + out = write_escaped_cp(out, escape); + } while (begin != end); + *out++ = static_cast('"'); + return out; +} + +template +auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + *out++ = static_cast('\''); + if ((needs_escape(static_cast(v)) && v != static_cast('"')) || + v == static_cast('\'')) { + out = write_escaped_cp( + out, find_escape_result{&v, &v + 1, static_cast(v)}); + } else { + *out++ = v; + } + *out++ = static_cast('\''); + return out; +} + +template +FMT_CONSTEXPR auto write_char(OutputIt out, Char value, + const basic_format_specs& specs) + -> OutputIt { + bool is_debug = specs.type == presentation_type::debug; + return write_padded(out, specs, 1, [=](reserve_iterator it) { + if (is_debug) return write_escaped_char(it, value); + *it++ = value; + return it; + }); +} +template +FMT_CONSTEXPR auto write(OutputIt out, Char value, + const basic_format_specs& specs, + locale_ref loc = {}) -> OutputIt { + return check_char_specs(specs) + ? write_char(out, value, specs) + : write(out, static_cast(value), specs, loc); +} + +// Data for write_int that doesn't depend on output iterator type. It is used to +// avoid template code bloat. +template struct write_int_data { + size_t size; + size_t padding; + + FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix, + const basic_format_specs& specs) + : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) { + if (specs.align == align::numeric) { + auto width = to_unsigned(specs.width); + if (width > size) { + padding = width - size; + size = width; + } + } else if (specs.precision > num_digits) { + size = (prefix >> 24) + to_unsigned(specs.precision); + padding = to_unsigned(specs.precision - num_digits); + } + } +}; + +// Writes an integer in the format +// +// where are written by write_digits(it). +// prefix contains chars in three lower bytes and the size in the fourth byte. +template +FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, + unsigned prefix, + const basic_format_specs& specs, + W write_digits) -> OutputIt { + // Slightly faster check for specs.width == 0 && specs.precision == -1. + if ((specs.width | (specs.precision + 1)) == 0) { + auto it = reserve(out, to_unsigned(num_digits) + (prefix >> 24)); + if (prefix != 0) { + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + } + return base_iterator(out, write_digits(it)); + } + auto data = write_int_data(num_digits, prefix, specs); + return write_padded( + out, specs, data.size, [=](reserve_iterator it) { + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + it = detail::fill_n(it, data.padding, static_cast('0')); + return write_digits(it); + }); +} + +template class digit_grouping { + private: + std::string grouping_; + std::basic_string thousands_sep_; + + struct next_state { + std::string::const_iterator group; + int pos; + }; + next_state initial_state() const { return {grouping_.begin(), 0}; } + + // Returns the next digit group separator position. + int next(next_state& state) const { + if (thousands_sep_.empty()) return max_value(); + if (state.group == grouping_.end()) return state.pos += grouping_.back(); + if (*state.group <= 0 || *state.group == max_value()) + return max_value(); + state.pos += *state.group++; + return state.pos; + } + + public: + explicit digit_grouping(locale_ref loc, bool localized = true) { + if (!localized) return; + auto sep = thousands_sep(loc); + grouping_ = sep.grouping; + if (sep.thousands_sep) thousands_sep_.assign(1, sep.thousands_sep); + } + digit_grouping(std::string grouping, std::basic_string sep) + : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} + + bool has_separator() const { return !thousands_sep_.empty(); } + + int count_separators(int num_digits) const { + int count = 0; + auto state = initial_state(); + while (num_digits > next(state)) ++count; + return count; + } + + // Applies grouping to digits and write the output to out. + template + Out apply(Out out, basic_string_view digits) const { + auto num_digits = static_cast(digits.size()); + auto separators = basic_memory_buffer(); + separators.push_back(0); + auto state = initial_state(); + while (int i = next(state)) { + if (i >= num_digits) break; + separators.push_back(i); + } + for (int i = 0, sep_index = static_cast(separators.size() - 1); + i < num_digits; ++i) { + if (num_digits - i == separators[sep_index]) { + out = + copy_str(thousands_sep_.data(), + thousands_sep_.data() + thousands_sep_.size(), out); + --sep_index; + } + *out++ = static_cast(digits[to_unsigned(i)]); + } + return out; + } +}; + +// Writes a decimal integer with digit grouping. +template +auto write_int(OutputIt out, UInt value, unsigned prefix, + const basic_format_specs& specs, + const digit_grouping& grouping) -> OutputIt { + static_assert(std::is_same, UInt>::value, ""); + int num_digits = count_digits(value); + char digits[40]; + format_decimal(digits, value, num_digits); + unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + + grouping.count_separators(num_digits)); + return write_padded( + out, specs, size, size, [&](reserve_iterator it) { + if (prefix != 0) { + char sign = static_cast(prefix); + *it++ = static_cast(sign); + } + return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + }); +} + +// Writes a localized value. +FMT_API auto write_loc(appender out, loc_value value, const format_specs& specs, + locale_ref loc) -> bool; +template +inline auto write_loc(OutputIt, loc_value, const basic_format_specs&, + locale_ref) -> bool { + return false; +} + +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + +template struct write_int_arg { + UInt abs_value; + unsigned prefix; +}; + +template +FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign) + -> write_int_arg> { + auto prefix = 0u; + auto abs_value = static_cast>(value); + if (is_negative(value)) { + prefix = 0x01000000 | '-'; + abs_value = 0 - abs_value; + } else { + constexpr const unsigned prefixes[4] = {0, 0, 0x1000000u | '+', + 0x1000000u | ' '}; + prefix = prefixes[sign]; + } + return {abs_value, prefix}; +} + +template struct loc_writer { + buffer_appender out; + const basic_format_specs& specs; + std::basic_string sep; + std::string grouping; + std::basic_string decimal_point; + + template ::value)> + auto operator()(T value) -> bool { + auto arg = make_write_int_arg(value, specs.sign); + write_int(out, static_cast>(arg.abs_value), arg.prefix, + specs, digit_grouping(grouping, sep)); + return true; + } + + template ::value)> + auto operator()(T) -> bool { + return false; + } + + auto operator()(...) -> bool { return false; } +}; + +template +FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg arg, + const basic_format_specs& specs, + locale_ref) -> OutputIt { + static_assert(std::is_same>::value, ""); + auto abs_value = arg.abs_value; + auto prefix = arg.prefix; + switch (specs.type) { + case presentation_type::none: + case presentation_type::dec: { + auto num_digits = count_digits(abs_value); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_decimal(it, abs_value, num_digits).end; + }); + } + case presentation_type::hex_lower: + case presentation_type::hex_upper: { + bool upper = specs.type == presentation_type::hex_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + int num_digits = count_digits<4>(abs_value); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<4, Char>(it, abs_value, num_digits, upper); + }); + } + case presentation_type::bin_lower: + case presentation_type::bin_upper: { + bool upper = specs.type == presentation_type::bin_upper; + if (specs.alt) + prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); + int num_digits = count_digits<1>(abs_value); + return write_int(out, num_digits, prefix, specs, + [=](reserve_iterator it) { + return format_uint<1, Char>(it, abs_value, num_digits); + }); + } + case presentation_type::oct: { + int num_digits = count_digits<3>(abs_value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && abs_value != 0) + prefix_append(prefix, '0'); + return write_int(out, num_digits, prefix, specs, + [=](reserve_iterator it) { + return format_uint<3, Char>(it, abs_value, num_digits); + }); + } + case presentation_type::chr: + return write_char(out, static_cast(abs_value), specs); + default: + throw_format_error("invalid type specifier"); + } + return out; +} +template +FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline( + OutputIt out, write_int_arg arg, const basic_format_specs& specs, + locale_ref loc) -> OutputIt { + return write_int(out, arg, specs, loc); +} +template ::value && + !std::is_same::value && + std::is_same>::value)> +FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, + const basic_format_specs& specs, + locale_ref loc) -> OutputIt { + if (specs.localized && write_loc(out, value, specs, loc)) return out; + return write_int_noinline(out, make_write_int_arg(value, specs.sign), specs, + loc); +} +// An inlined version of write used in format string compilation. +template ::value && + !std::is_same::value && + !std::is_same>::value)> +FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, + const basic_format_specs& specs, + locale_ref loc) -> OutputIt { + if (specs.localized && write_loc(out, value, specs, loc)) return out; + return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); +} + +// An output iterator that counts the number of objects written to it and +// discards them. +class counting_iterator { + private: + size_t count_; + + public: + using iterator_category = std::output_iterator_tag; + using difference_type = std::ptrdiff_t; + using pointer = void; + using reference = void; + FMT_UNCHECKED_ITERATOR(counting_iterator); + + struct value_type { + template FMT_CONSTEXPR void operator=(const T&) {} + }; + + FMT_CONSTEXPR counting_iterator() : count_(0) {} + + FMT_CONSTEXPR size_t count() const { return count_; } + + FMT_CONSTEXPR counting_iterator& operator++() { + ++count_; + return *this; + } + FMT_CONSTEXPR counting_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, + difference_type n) { + it.count_ += static_cast(n); + return it; + } + + FMT_CONSTEXPR value_type operator*() const { return {}; } +}; + +template +FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, + const basic_format_specs& specs) -> OutputIt { + auto data = s.data(); + auto size = s.size(); + if (specs.precision >= 0 && to_unsigned(specs.precision) < size) + size = code_point_index(s, to_unsigned(specs.precision)); + bool is_debug = specs.type == presentation_type::debug; + size_t width = 0; + if (specs.width != 0) { + if (is_debug) + width = write_escaped_string(counting_iterator{}, s).count(); + else + width = compute_width(basic_string_view(data, size)); + } + return write_padded(out, specs, size, width, + [=](reserve_iterator it) { + if (is_debug) return write_escaped_string(it, s); + return copy_str(data, data + size, it); + }); +} +template +FMT_CONSTEXPR auto write(OutputIt out, + basic_string_view> s, + const basic_format_specs& specs, locale_ref) + -> OutputIt { + check_string_type_spec(specs.type); + return write(out, s, specs); +} +template +FMT_CONSTEXPR auto write(OutputIt out, const Char* s, + const basic_format_specs& specs, locale_ref) + -> OutputIt { + return check_cstring_type_spec(specs.type) + ? write(out, basic_string_view(s), specs, {}) + : write_ptr(out, bit_cast(s), &specs); +} + +template ::value && + !std::is_same::value && + !std::is_same::value)> +FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { + auto abs_value = static_cast>(value); + bool negative = is_negative(value); + // Don't do -abs_value since it trips unsigned-integer-overflow sanitizer. + if (negative) abs_value = ~abs_value + 1; + int num_digits = count_digits(abs_value); + auto size = (negative ? 1 : 0) + static_cast(num_digits); + auto it = reserve(out, size); + if (auto ptr = to_pointer(it, size)) { + if (negative) *ptr++ = static_cast('-'); + format_decimal(ptr, abs_value, num_digits); + return out; + } + if (negative) *it++ = static_cast('-'); + it = format_decimal(it, abs_value, num_digits).end; + return base_iterator(out, it); +} + +template +FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan, + basic_format_specs specs, + const float_specs& fspecs) -> OutputIt { + auto str = + isnan ? (fspecs.upper ? "NAN" : "nan") : (fspecs.upper ? "INF" : "inf"); + constexpr size_t str_size = 3; + auto sign = fspecs.sign; + auto size = str_size + (sign ? 1 : 0); + // Replace '0'-padding with space for non-finite values. + const bool is_zero_fill = + specs.fill.size() == 1 && *specs.fill.data() == static_cast('0'); + if (is_zero_fill) specs.fill[0] = static_cast(' '); + return write_padded(out, specs, size, [=](reserve_iterator it) { + if (sign) *it++ = detail::sign(sign); + return copy_str(str, str + str_size, it); + }); +} + +// A decimal floating-point number significand * pow(10, exp). +struct big_decimal_fp { + const char* significand; + int significand_size; + int exponent; +}; + +constexpr auto get_significand_size(const big_decimal_fp& f) -> int { + return f.significand_size; +} +template +inline auto get_significand_size(const dragonbox::decimal_fp& f) -> int { + return count_digits(f.significand); +} + +template +constexpr auto write_significand(OutputIt out, const char* significand, + int significand_size) -> OutputIt { + return copy_str(significand, significand + significand_size, out); +} +template +inline auto write_significand(OutputIt out, UInt significand, + int significand_size) -> OutputIt { + return format_decimal(out, significand, significand_size).end; +} +template +FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand, + int significand_size, int exponent, + const Grouping& grouping) -> OutputIt { + if (!grouping.has_separator()) { + out = write_significand(out, significand, significand_size); + return detail::fill_n(out, exponent, static_cast('0')); + } + auto buffer = memory_buffer(); + write_significand(appender(buffer), significand, significand_size); + detail::fill_n(appender(buffer), exponent, '0'); + return grouping.apply(out, string_view(buffer.data(), buffer.size())); +} + +template ::value)> +inline auto write_significand(Char* out, UInt significand, int significand_size, + int integral_size, Char decimal_point) -> Char* { + if (!decimal_point) + return format_decimal(out, significand, significand_size).end; + out += significand_size + 1; + Char* end = out; + int floating_size = significand_size - integral_size; + for (int i = floating_size / 2; i > 0; --i) { + out -= 2; + copy2(out, digits2(static_cast(significand % 100))); + significand /= 100; + } + if (floating_size % 2 != 0) { + *--out = static_cast('0' + significand % 10); + significand /= 10; + } + *--out = decimal_point; + format_decimal(out - integral_size, significand, integral_size); + return end; +} + +template >::value)> +inline auto write_significand(OutputIt out, UInt significand, + int significand_size, int integral_size, + Char decimal_point) -> OutputIt { + // Buffer is large enough to hold digits (digits10 + 1) and a decimal point. + Char buffer[digits10() + 2]; + auto end = write_significand(buffer, significand, significand_size, + integral_size, decimal_point); + return detail::copy_str_noinline(buffer, end, out); +} + +template +FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand, + int significand_size, int integral_size, + Char decimal_point) -> OutputIt { + out = detail::copy_str_noinline(significand, + significand + integral_size, out); + if (!decimal_point) return out; + *out++ = decimal_point; + return detail::copy_str_noinline(significand + integral_size, + significand + significand_size, out); +} + +template +FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand, + int significand_size, int integral_size, + Char decimal_point, + const Grouping& grouping) -> OutputIt { + if (!grouping.has_separator()) { + return write_significand(out, significand, significand_size, integral_size, + decimal_point); + } + auto buffer = basic_memory_buffer(); + write_significand(buffer_appender(buffer), significand, + significand_size, integral_size, decimal_point); + grouping.apply( + out, basic_string_view(buffer.data(), to_unsigned(integral_size))); + return detail::copy_str_noinline(buffer.data() + integral_size, + buffer.end(), out); +} + +template > +FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, + const basic_format_specs& specs, + float_specs fspecs, locale_ref loc) + -> OutputIt { + auto significand = f.significand; + int significand_size = get_significand_size(f); + const Char zero = static_cast('0'); + auto sign = fspecs.sign; + size_t size = to_unsigned(significand_size) + (sign ? 1 : 0); + using iterator = reserve_iterator; + + Char decimal_point = + fspecs.locale ? detail::decimal_point(loc) : static_cast('.'); + + int output_exp = f.exponent + significand_size - 1; + auto use_exp_format = [=]() { + if (fspecs.format == float_format::exp) return true; + if (fspecs.format != float_format::general) return false; + // Use the fixed notation if the exponent is in [exp_lower, exp_upper), + // e.g. 0.0001 instead of 1e-04. Otherwise use the exponent notation. + const int exp_lower = -4, exp_upper = 16; + return output_exp < exp_lower || + output_exp >= (fspecs.precision > 0 ? fspecs.precision : exp_upper); + }; + if (use_exp_format()) { + int num_zeros = 0; + if (fspecs.showpoint) { + num_zeros = fspecs.precision - significand_size; + if (num_zeros < 0) num_zeros = 0; + size += to_unsigned(num_zeros); + } else if (significand_size == 1) { + decimal_point = Char(); + } + auto abs_output_exp = output_exp >= 0 ? output_exp : -output_exp; + int exp_digits = 2; + if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; + + size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); + char exp_char = fspecs.upper ? 'E' : 'e'; + auto write = [=](iterator it) { + if (sign) *it++ = detail::sign(sign); + // Insert a decimal point after the first digit and add an exponent. + it = write_significand(it, significand, significand_size, 1, + decimal_point); + if (num_zeros > 0) it = detail::fill_n(it, num_zeros, zero); + *it++ = static_cast(exp_char); + return write_exponent(output_exp, it); + }; + return specs.width > 0 ? write_padded(out, specs, size, write) + : base_iterator(out, write(reserve(out, size))); + } + + int exp = f.exponent + significand_size; + if (f.exponent >= 0) { + // 1234e5 -> 123400000[.0+] + size += to_unsigned(f.exponent); + int num_zeros = fspecs.precision - exp; + abort_fuzzing_if(num_zeros > 5000); + if (fspecs.showpoint) { + ++size; + if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; + if (num_zeros > 0) size += to_unsigned(num_zeros); + } + auto grouping = Grouping(loc, fspecs.locale); + size += to_unsigned(grouping.count_separators(exp)); + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = detail::sign(sign); + it = write_significand(it, significand, significand_size, + f.exponent, grouping); + if (!fspecs.showpoint) return it; + *it++ = decimal_point; + return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; + }); + } else if (exp > 0) { + // 1234e-2 -> 12.34[0+] + int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; + size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); + auto grouping = Grouping(loc, fspecs.locale); + size += to_unsigned(grouping.count_separators(significand_size)); + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = detail::sign(sign); + it = write_significand(it, significand, significand_size, exp, + decimal_point, grouping); + return num_zeros > 0 ? detail::fill_n(it, num_zeros, zero) : it; + }); + } + // 1234e-6 -> 0.001234 + int num_zeros = -exp; + if (significand_size == 0 && fspecs.precision >= 0 && + fspecs.precision < num_zeros) { + num_zeros = fspecs.precision; + } + bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint; + size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros); + return write_padded(out, specs, size, [&](iterator it) { + if (sign) *it++ = detail::sign(sign); + *it++ = zero; + if (!pointy) return it; + *it++ = decimal_point; + it = detail::fill_n(it, num_zeros, zero); + return write_significand(it, significand, significand_size); + }); +} + +template class fallback_digit_grouping { + public: + constexpr fallback_digit_grouping(locale_ref, bool) {} + + constexpr bool has_separator() const { return false; } + + constexpr int count_separators(int) const { return 0; } + + template + constexpr Out apply(Out out, basic_string_view) const { + return out; + } +}; + +template +FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, + const basic_format_specs& specs, + float_specs fspecs, locale_ref loc) + -> OutputIt { + if (is_constant_evaluated()) { + return do_write_float>(out, f, specs, fspecs, + loc); + } else { + return do_write_float(out, f, specs, fspecs, loc); + } +} + +template constexpr bool isnan(T value) { + return !(value >= value); // std::isnan doesn't support __float128. +} + +template +struct has_isfinite : std::false_type {}; + +template +struct has_isfinite> + : std::true_type {}; + +template ::value&& + has_isfinite::value)> +FMT_CONSTEXPR20 bool isfinite(T value) { + constexpr T inf = T(std::numeric_limits::infinity()); + if (is_constant_evaluated()) + return !detail::isnan(value) && value < inf && value > -inf; + return std::isfinite(value); +} +template ::value)> +FMT_CONSTEXPR bool isfinite(T value) { + T inf = T(std::numeric_limits::infinity()); + // std::isfinite doesn't support __float128. + return !detail::isnan(value) && value < inf && value > -inf; +} + +template ::value)> +FMT_INLINE FMT_CONSTEXPR bool signbit(T value) { + if (is_constant_evaluated()) { +#ifdef __cpp_if_constexpr + if constexpr (std::numeric_limits::is_iec559) { + auto bits = detail::bit_cast(static_cast(value)); + return (bits >> (num_bits() - 1)) != 0; + } +#endif + } + return std::signbit(static_cast(value)); +} + +enum class round_direction { unknown, up, down }; + +// Given the divisor (normally a power of 10), the remainder = v % divisor for +// some number v and the error, returns whether v should be rounded up, down, or +// whether the rounding direction can't be determined due to error. +// error should be less than divisor / 2. +FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor, + uint64_t remainder, + uint64_t error) { + FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. + FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. + FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. + // Round down if (remainder + error) * 2 <= divisor. + if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) + return round_direction::down; + // Round up if (remainder - error) * 2 >= divisor. + if (remainder >= error && + remainder - error >= divisor - (remainder - error)) { + return round_direction::up; + } + return round_direction::unknown; +} + +namespace digits { +enum result { + more, // Generate more digits. + done, // Done generating digits. + error // Digit generation cancelled due to an error. +}; +} + +struct gen_digits_handler { + char* buf; + int size; + int precision; + int exp10; + bool fixed; + + FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor, + uint64_t remainder, uint64_t error, + bool integral) { + FMT_ASSERT(remainder < divisor, ""); + buf[size++] = digit; + if (!integral && error >= remainder) return digits::error; + if (size < precision) return digits::more; + if (!integral) { + // Check if error * 2 < divisor with overflow prevention. + // The check is not needed for the integral part because error = 1 + // and divisor > (1 << 32) there. + if (error >= divisor || error >= divisor - error) return digits::error; + } else { + FMT_ASSERT(error == 1 && divisor > 2, ""); + } + auto dir = get_round_direction(divisor, remainder, error); + if (dir != round_direction::up) + return dir == round_direction::down ? digits::done : digits::error; + ++buf[size - 1]; + for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] > '9') { + buf[0] = '1'; + if (fixed) + buf[size++] = '0'; + else + ++exp10; + } + return digits::done; + } +}; + +inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { + // Adjust fixed precision by exponent because it is relative to decimal + // point. + if (exp10 > 0 && precision > max_value() - exp10) + FMT_THROW(format_error("number is too big")); + precision += exp10; +} + +// Generates output using the Grisu digit-gen algorithm. +// error: the size of the region (lower, upper) outside of which numbers +// definitely do not round to value (Delta in Grisu3). +FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error, + int& exp, + gen_digits_handler& handler) + -> digits::result { + const fp one(1ULL << -value.e, value.e); + // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be + // zero because it contains a product of two 64-bit numbers with MSB set (due + // to normalization) - 1, shifted right by at most 60 bits. + auto integral = static_cast(value.f >> -one.e); + FMT_ASSERT(integral != 0, ""); + FMT_ASSERT(integral == value.f >> -one.e, ""); + // The fractional part of scaled value (p2 in Grisu) c = value % one. + uint64_t fractional = value.f & (one.f - 1); + exp = count_digits(integral); // kappa in Grisu. + // Non-fixed formats require at least one digit and no precision adjustment. + if (handler.fixed) { + adjust_precision(handler.precision, exp + handler.exp10); + // Check if precision is satisfied just by leading zeros, e.g. + // format("{:.2f}", 0.001) gives "0.00" without generating any digits. + if (handler.precision <= 0) { + if (handler.precision < 0) return digits::done; + // Divide by 10 to prevent overflow. + uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e; + auto dir = get_round_direction(divisor, value.f / 10, error * 10); + if (dir == round_direction::unknown) return digits::error; + handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0'; + return digits::done; + } + } + // Generate digits for the integral part. This can produce up to 10 digits. + do { + uint32_t digit = 0; + auto divmod_integral = [&](uint32_t divisor) { + digit = integral / divisor; + integral %= divisor; + }; + // This optimization by Milo Yip reduces the number of integer divisions by + // one per iteration. + switch (exp) { + case 10: + divmod_integral(1000000000); + break; + case 9: + divmod_integral(100000000); + break; + case 8: + divmod_integral(10000000); + break; + case 7: + divmod_integral(1000000); + break; + case 6: + divmod_integral(100000); + break; + case 5: + divmod_integral(10000); + break; + case 4: + divmod_integral(1000); + break; + case 3: + divmod_integral(100); + break; + case 2: + divmod_integral(10); + break; + case 1: + digit = integral; + integral = 0; + break; + default: + FMT_ASSERT(false, "invalid number of digits"); + } + --exp; + auto remainder = (static_cast(integral) << -one.e) + fractional; + auto result = handler.on_digit(static_cast('0' + digit), + data::power_of_10_64[exp] << -one.e, + remainder, error, true); + if (result != digits::more) return result; + } while (exp > 0); + // Generate digits for the fractional part. + for (;;) { + fractional *= 10; + error *= 10; + char digit = static_cast('0' + (fractional >> -one.e)); + fractional &= one.f - 1; + --exp; + auto result = handler.on_digit(digit, one.f, fractional, error, false); + if (result != digits::more) return result; + } +} + +class bigint { + private: + // A bigint is stored as an array of bigits (big digits), with bigit at index + // 0 being the least significant one. + using bigit = uint32_t; + using double_bigit = uint64_t; + enum { bigits_capacity = 32 }; + basic_memory_buffer bigits_; + int exp_; + + FMT_CONSTEXPR20 bigit operator[](int index) const { + return bigits_[to_unsigned(index)]; + } + FMT_CONSTEXPR20 bigit& operator[](int index) { + return bigits_[to_unsigned(index)]; + } + + static constexpr const int bigit_bits = num_bits(); + + friend struct formatter; + + FMT_CONSTEXPR20 void subtract_bigits(int index, bigit other, bigit& borrow) { + auto result = static_cast((*this)[index]) - other - borrow; + (*this)[index] = static_cast(result); + borrow = static_cast(result >> (bigit_bits * 2 - 1)); + } + + FMT_CONSTEXPR20 void remove_leading_zeros() { + int num_bigits = static_cast(bigits_.size()) - 1; + while (num_bigits > 0 && (*this)[num_bigits] == 0) --num_bigits; + bigits_.resize(to_unsigned(num_bigits + 1)); + } + + // Computes *this -= other assuming aligned bigints and *this >= other. + FMT_CONSTEXPR20 void subtract_aligned(const bigint& other) { + FMT_ASSERT(other.exp_ >= exp_, "unaligned bigints"); + FMT_ASSERT(compare(*this, other) >= 0, ""); + bigit borrow = 0; + int i = other.exp_ - exp_; + for (size_t j = 0, n = other.bigits_.size(); j != n; ++i, ++j) + subtract_bigits(i, other.bigits_[j], borrow); + while (borrow > 0) subtract_bigits(i, 0, borrow); + remove_leading_zeros(); + } + + FMT_CONSTEXPR20 void multiply(uint32_t value) { + const double_bigit wide_value = value; + bigit carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + double_bigit result = bigits_[i] * wide_value + carry; + bigits_[i] = static_cast(result); + carry = static_cast(result >> bigit_bits); + } + if (carry != 0) bigits_.push_back(carry); + } + + template ::value || + std::is_same::value)> + FMT_CONSTEXPR20 void multiply(UInt value) { + using half_uint = + conditional_t::value, uint64_t, uint32_t>; + const int shift = num_bits() - bigit_bits; + const UInt lower = static_cast(value); + const UInt upper = value >> num_bits(); + UInt carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + UInt result = lower * bigits_[i] + static_cast(carry); + carry = (upper * bigits_[i] << shift) + (result >> bigit_bits) + + (carry >> bigit_bits); + bigits_[i] = static_cast(result); + } + while (carry != 0) { + bigits_.push_back(static_cast(carry)); + carry >>= bigit_bits; + } + } + + template ::value || + std::is_same::value)> + FMT_CONSTEXPR20 void assign(UInt n) { + size_t num_bigits = 0; + do { + bigits_[num_bigits++] = static_cast(n); + n >>= bigit_bits; + } while (n != 0); + bigits_.resize(num_bigits); + exp_ = 0; + } + + public: + FMT_CONSTEXPR20 bigint() : exp_(0) {} + explicit bigint(uint64_t n) { assign(n); } + + bigint(const bigint&) = delete; + void operator=(const bigint&) = delete; + + FMT_CONSTEXPR20 void assign(const bigint& other) { + auto size = other.bigits_.size(); + bigits_.resize(size); + auto data = other.bigits_.data(); + std::copy(data, data + size, make_checked(bigits_.data(), size)); + exp_ = other.exp_; + } + + template FMT_CONSTEXPR20 void operator=(Int n) { + FMT_ASSERT(n > 0, ""); + assign(uint64_or_128_t(n)); + } + + FMT_CONSTEXPR20 int num_bigits() const { + return static_cast(bigits_.size()) + exp_; + } + + FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_ASSERT(shift >= 0, ""); + exp_ += shift / bigit_bits; + shift %= bigit_bits; + if (shift == 0) return *this; + bigit carry = 0; + for (size_t i = 0, n = bigits_.size(); i < n; ++i) { + bigit c = bigits_[i] >> (bigit_bits - shift); + bigits_[i] = (bigits_[i] << shift) + carry; + carry = c; + } + if (carry != 0) bigits_.push_back(carry); + return *this; + } + + template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + FMT_ASSERT(value > 0, ""); + multiply(uint32_or_64_or_128_t(value)); + return *this; + } + + friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); + if (num_lhs_bigits != num_rhs_bigits) + return num_lhs_bigits > num_rhs_bigits ? 1 : -1; + int i = static_cast(lhs.bigits_.size()) - 1; + int j = static_cast(rhs.bigits_.size()) - 1; + int end = i - j; + if (end < 0) end = 0; + for (; i >= end; --i, --j) { + bigit lhs_bigit = lhs[i], rhs_bigit = rhs[j]; + if (lhs_bigit != rhs_bigit) return lhs_bigit > rhs_bigit ? 1 : -1; + } + if (i != j) return i > j ? 1 : -1; + return 0; + } + + // Returns compare(lhs1 + lhs2, rhs). + friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, + const bigint& rhs) { + auto minimum = [](int a, int b) { return a < b ? a : b; }; + auto maximum = [](int a, int b) { return a > b ? a : b; }; + int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); + int num_rhs_bigits = rhs.num_bigits(); + if (max_lhs_bigits + 1 < num_rhs_bigits) return -1; + if (max_lhs_bigits > num_rhs_bigits) return 1; + auto get_bigit = [](const bigint& n, int i) -> bigit { + return i >= n.exp_ && i < n.num_bigits() ? n[i - n.exp_] : 0; + }; + double_bigit borrow = 0; + int min_exp = minimum(minimum(lhs1.exp_, lhs2.exp_), rhs.exp_); + for (int i = num_rhs_bigits - 1; i >= min_exp; --i) { + double_bigit sum = + static_cast(get_bigit(lhs1, i)) + get_bigit(lhs2, i); + bigit rhs_bigit = get_bigit(rhs, i); + if (sum > rhs_bigit + borrow) return 1; + borrow = rhs_bigit + borrow - sum; + if (borrow > 1) return -1; + borrow <<= bigit_bits; + } + return borrow != 0 ? -1 : 0; + } + + // Assigns pow(10, exp) to this bigint. + FMT_CONSTEXPR20 void assign_pow10(int exp) { + FMT_ASSERT(exp >= 0, ""); + if (exp == 0) return *this = 1; + // Find the top bit. + int bitmask = 1; + while (exp >= bitmask) bitmask <<= 1; + bitmask >>= 1; + // pow(10, exp) = pow(5, exp) * pow(2, exp). First compute pow(5, exp) by + // repeated squaring and multiplication. + *this = 5; + bitmask >>= 1; + while (bitmask != 0) { + square(); + if ((exp & bitmask) != 0) *this *= 5; + bitmask >>= 1; + } + *this <<= exp; // Multiply by pow(2, exp) by shifting. + } + + FMT_CONSTEXPR20 void square() { + int num_bigits = static_cast(bigits_.size()); + int num_result_bigits = 2 * num_bigits; + basic_memory_buffer n(std::move(bigits_)); + bigits_.resize(to_unsigned(num_result_bigits)); + auto sum = uint128_t(); + for (int bigit_index = 0; bigit_index < num_bigits; ++bigit_index) { + // Compute bigit at position bigit_index of the result by adding + // cross-product terms n[i] * n[j] such that i + j == bigit_index. + for (int i = 0, j = bigit_index; j >= 0; ++i, --j) { + // Most terms are multiplied twice which can be optimized in the future. + sum += static_cast(n[i]) * n[j]; + } + (*this)[bigit_index] = static_cast(sum); + sum >>= num_bits(); // Compute the carry. + } + // Do the same for the top half. + for (int bigit_index = num_bigits; bigit_index < num_result_bigits; + ++bigit_index) { + for (int j = num_bigits - 1, i = bigit_index - j; i < num_bigits;) + sum += static_cast(n[i++]) * n[j--]; + (*this)[bigit_index] = static_cast(sum); + sum >>= num_bits(); + } + remove_leading_zeros(); + exp_ *= 2; + } + + // If this bigint has a bigger exponent than other, adds trailing zero to make + // exponents equal. This simplifies some operations such as subtraction. + FMT_CONSTEXPR20 void align(const bigint& other) { + int exp_difference = exp_ - other.exp_; + if (exp_difference <= 0) return; + int num_bigits = static_cast(bigits_.size()); + bigits_.resize(to_unsigned(num_bigits + exp_difference)); + for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) + bigits_[j] = bigits_[i]; + std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + exp_ -= exp_difference; + } + + // Divides this bignum by divisor, assigning the remainder to this and + // returning the quotient. + FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_ASSERT(this != &divisor, ""); + if (compare(*this, divisor) < 0) return 0; + FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); + align(divisor); + int quotient = 0; + do { + subtract_aligned(divisor); + ++quotient; + } while (compare(*this, divisor) >= 0); + return quotient; + } +}; + +// format_dragon flags. +enum dragon { + predecessor_closer = 1, + fixup = 2, // Run fixup to correct exp10 which can be off by one. + fixed = 4, +}; + +// Formats a floating-point number using a variation of the Fixed-Precision +// Positive Floating-Point Printout ((FPP)^2) algorithm by Steele & White: +// https://fmt.dev/papers/p372-steele.pdf. +FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, + unsigned flags, int num_digits, + buffer& buf, int& exp10) { + bigint numerator; // 2 * R in (FPP)^2. + bigint denominator; // 2 * S in (FPP)^2. + // lower and upper are differences between value and corresponding boundaries. + bigint lower; // (M^- in (FPP)^2). + bigint upper_store; // upper's value if different from lower. + bigint* upper = nullptr; // (M^+ in (FPP)^2). + // Shift numerator and denominator by an extra bit or two (if lower boundary + // is closer) to make lower and upper integers. This eliminates multiplication + // by 2 during later computations. + bool is_predecessor_closer = (flags & dragon::predecessor_closer) != 0; + int shift = is_predecessor_closer ? 2 : 1; + if (value.e >= 0) { + numerator = value.f; + numerator <<= value.e + shift; + lower = 1; + lower <<= value.e; + if (is_predecessor_closer) { + upper_store = 1; + upper_store <<= value.e + 1; + upper = &upper_store; + } + denominator.assign_pow10(exp10); + denominator <<= shift; + } else if (exp10 < 0) { + numerator.assign_pow10(-exp10); + lower.assign(numerator); + if (is_predecessor_closer) { + upper_store.assign(numerator); + upper_store <<= 1; + upper = &upper_store; + } + numerator *= value.f; + numerator <<= shift; + denominator = 1; + denominator <<= shift - value.e; + } else { + numerator = value.f; + numerator <<= shift; + denominator.assign_pow10(exp10); + denominator <<= shift - value.e; + lower = 1; + if (is_predecessor_closer) { + upper_store = 1ULL << 1; + upper = &upper_store; + } + } + int even = static_cast((value.f & 1) == 0); + if (!upper) upper = &lower; + if ((flags & dragon::fixup) != 0) { + if (add_compare(numerator, *upper, denominator) + even <= 0) { + --exp10; + numerator *= 10; + if (num_digits < 0) { + lower *= 10; + if (upper != &lower) *upper *= 10; + } + } + if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1); + } + // Invariant: value == (numerator / denominator) * pow(10, exp10). + if (num_digits < 0) { + // Generate the shortest representation. + num_digits = 0; + char* data = buf.data(); + for (;;) { + int digit = numerator.divmod_assign(denominator); + bool low = compare(numerator, lower) - even < 0; // numerator <[=] lower. + // numerator + upper >[=] pow10: + bool high = add_compare(numerator, *upper, denominator) + even > 0; + data[num_digits++] = static_cast('0' + digit); + if (low || high) { + if (!low) { + ++data[num_digits - 1]; + } else if (high) { + int result = add_compare(numerator, numerator, denominator); + // Round half to even. + if (result > 0 || (result == 0 && (digit % 2) != 0)) + ++data[num_digits - 1]; + } + buf.try_resize(to_unsigned(num_digits)); + exp10 -= num_digits - 1; + return; + } + numerator *= 10; + lower *= 10; + if (upper != &lower) *upper *= 10; + } + } + // Generate the given number of digits. + exp10 -= num_digits - 1; + if (num_digits == 0) { + denominator *= 10; + auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + buf.push_back(digit); + return; + } + buf.try_resize(to_unsigned(num_digits)); + for (int i = 0; i < num_digits - 1; ++i) { + int digit = numerator.divmod_assign(denominator); + buf[i] = static_cast('0' + digit); + numerator *= 10; + } + int digit = numerator.divmod_assign(denominator); + auto result = add_compare(numerator, numerator, denominator); + if (result > 0 || (result == 0 && (digit % 2) != 0)) { + if (digit == 9) { + const auto overflow = '0' + 10; + buf[num_digits - 1] = overflow; + // Propagate the carry. + for (int i = num_digits - 1; i > 0 && buf[i] == overflow; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] == overflow) { + buf[0] = '1'; + ++exp10; + } + return; + } + ++digit; + } + buf[num_digits - 1] = static_cast('0' + digit); +} + +template +FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, + buffer& buf) -> int { + // float is passed as double to reduce the number of instantiations. + static_assert(!std::is_same::value, ""); + FMT_ASSERT(value >= 0, "value is negative"); + auto converted_value = convert_float(value); + + const bool fixed = specs.format == float_format::fixed; + if (value <= 0) { // <= instead of == to silence a warning. + if (precision <= 0 || !fixed) { + buf.push_back('0'); + return 0; + } + buf.try_resize(to_unsigned(precision)); + fill_n(buf.data(), precision, '0'); + return -precision; + } + + int exp = 0; + bool use_dragon = true; + unsigned dragon_flags = 0; + if (!is_fast_float()) { + const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10) + using info = dragonbox::float_info; + const auto f = basic_fp(converted_value); + // Compute exp, an approximate power of 10, such that + // 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1). + // This is based on log10(value) == log2(value) / log2(10) and approximation + // of log2(value) by e + num_fraction_bits idea from double-conversion. + exp = static_cast( + std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10)); + dragon_flags = dragon::fixup; + } else if (!is_constant_evaluated() && precision < 0) { + // Use Dragonbox for the shortest format. + if (specs.binary32) { + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } + auto dec = dragonbox::to_decimal(static_cast(value)); + write(buffer_appender(buf), dec.significand); + return dec.exponent; + } else { + // Use Grisu + Dragon4 for the given precision: + // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. + const int min_exp = -60; // alpha in Grisu. + int cached_exp10 = 0; // K in Grisu. + fp normalized = normalize(fp(converted_value)); + const auto cached_pow = get_cached_power( + min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); + normalized = normalized * cached_pow; + gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; + if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error && + !is_constant_evaluated()) { + exp += handler.exp10; + buf.try_resize(to_unsigned(handler.size)); + use_dragon = false; + } else { + exp += handler.size - cached_exp10 - 1; + precision = handler.precision; + } + } + if (use_dragon) { + auto f = basic_fp(); + bool is_predecessor_closer = specs.binary32 + ? f.assign(static_cast(value)) + : f.assign(converted_value); + if (is_predecessor_closer) dragon_flags |= dragon::predecessor_closer; + if (fixed) dragon_flags |= dragon::fixed; + // Limit precision to the maximum possible number of significant digits in + // an IEEE754 double because we don't need to generate zeros. + const int max_double_digits = 767; + if (precision > max_double_digits) precision = max_double_digits; + format_dragon(f, dragon_flags, precision, buf, exp); + } + if (!fixed && !specs.showpoint) { + // Remove trailing zeros. + auto num_digits = buf.size(); + while (num_digits > 0 && buf[num_digits - 1] == '0') { + --num_digits; + ++exp; + } + buf.try_resize(num_digits); + } + return exp; +} +template +FMT_CONSTEXPR20 auto write_float(OutputIt out, T value, + basic_format_specs specs, locale_ref loc) + -> OutputIt { + float_specs fspecs = parse_float_type_spec(specs); + fspecs.sign = specs.sign; + if (detail::signbit(value)) { // value < 0 is false for NaN so use signbit. + fspecs.sign = sign::minus; + value = -value; + } else if (fspecs.sign == sign::minus) { + fspecs.sign = sign::none; + } + + if (!detail::isfinite(value)) + return write_nonfinite(out, detail::isnan(value), specs, fspecs); + + if (specs.align == align::numeric && fspecs.sign) { + auto it = reserve(out, 1); + *it++ = detail::sign(fspecs.sign); + out = base_iterator(out, it); + fspecs.sign = sign::none; + if (specs.width != 0) --specs.width; + } + + memory_buffer buffer; + if (fspecs.format == float_format::hex) { + if (fspecs.sign) buffer.push_back(detail::sign(fspecs.sign)); + snprintf_float(convert_float(value), specs.precision, fspecs, buffer); + return write_bytes(out, {buffer.data(), buffer.size()}, + specs); + } + int precision = specs.precision >= 0 || specs.type == presentation_type::none + ? specs.precision + : 6; + if (fspecs.format == float_format::exp) { + if (precision == max_value()) + throw_format_error("number is too big"); + else + ++precision; + } else if (fspecs.format != float_format::fixed && precision == 0) { + precision = 1; + } + if (const_check(std::is_same())) fspecs.binary32 = true; + int exp = format_float(convert_float(value), precision, fspecs, buffer); + fspecs.precision = precision; + auto f = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; + return write_float(out, f, specs, fspecs, loc); +} + +template ::value)> +FMT_CONSTEXPR20 auto write(OutputIt out, T value, + basic_format_specs specs, locale_ref loc = {}) + -> OutputIt { + if (const_check(!is_supported_floating_point(value))) return out; + return specs.localized && write_loc(out, value, specs, loc) + ? out + : write_float(out, value, specs, loc); +} + +template ::value)> +FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt { + if (is_constant_evaluated()) + return write(out, value, basic_format_specs()); + if (const_check(!is_supported_floating_point(value))) return out; + + auto fspecs = float_specs(); + if (detail::signbit(value)) { + fspecs.sign = sign::minus; + value = -value; + } + + constexpr auto specs = basic_format_specs(); + using floaty = conditional_t::value, double, T>; + using uint = typename dragonbox::float_info::carrier_uint; + uint mask = exponent_mask(); + if ((bit_cast(value) & mask) == mask) + return write_nonfinite(out, std::isnan(value), specs, fspecs); + + auto dec = dragonbox::to_decimal(static_cast(value)); + return write_float(out, dec, specs, fspecs, {}); +} + +template ::value && + !is_fast_float::value)> +inline auto write(OutputIt out, T value) -> OutputIt { + return write(out, value, basic_format_specs()); +} + +template +auto write(OutputIt out, monostate, basic_format_specs = {}, + locale_ref = {}) -> OutputIt { + FMT_ASSERT(false, ""); + return out; +} + +template +FMT_CONSTEXPR auto write(OutputIt out, basic_string_view value) + -> OutputIt { + auto it = reserve(out, value.size()); + it = copy_str_noinline(value.begin(), value.end(), it); + return base_iterator(out, it); +} + +template ::value)> +constexpr auto write(OutputIt out, const T& value) -> OutputIt { + return write(out, to_string_view(value)); +} + +// FMT_ENABLE_IF() condition separated to workaround an MSVC bug. +template < + typename Char, typename OutputIt, typename T, + bool check = + std::is_enum::value && !std::is_same::value && + mapped_type_constant>::value != + type::custom_type, + FMT_ENABLE_IF(check)> +FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { + return write(out, static_cast>(value)); +} + +template ::value)> +FMT_CONSTEXPR auto write(OutputIt out, T value, + const basic_format_specs& specs = {}, + locale_ref = {}) -> OutputIt { + return specs.type != presentation_type::none && + specs.type != presentation_type::string + ? write(out, value ? 1 : 0, specs, {}) + : write_bytes(out, value ? "true" : "false", specs); +} + +template +FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { + auto it = reserve(out, 1); + *it++ = value; + return base_iterator(out, it); +} + +template +FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) + -> OutputIt { + if (!value) { + throw_format_error("string pointer is null"); + } else { + out = write(out, basic_string_view(value)); + } + return out; +} + +template ::value)> +auto write(OutputIt out, const T* value, + const basic_format_specs& specs = {}, locale_ref = {}) + -> OutputIt { + check_pointer_type_spec(specs.type, error_handler()); + return write_ptr(out, bit_cast(value), &specs); +} + +// A write overload that handles implicit conversions. +template > +FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t< + std::is_class::value && !is_string::value && + !is_floating_point::value && !std::is_same::value && + !std::is_same().map(value))>::value, + OutputIt> { + return write(out, arg_mapper().map(value)); +} + +template > +FMT_CONSTEXPR auto write(OutputIt out, const T& value) + -> enable_if_t::value == type::custom_type, + OutputIt> { + using formatter_type = + conditional_t::value, + typename Context::template formatter_type, + fallback_formatter>; + auto ctx = Context(out, {}, {}); + return formatter_type().format(value, ctx); +} + +// An argument visitor that formats the argument and writes it via the output +// iterator. It's a class and not a generic lambda for compatibility with C++11. +template struct default_arg_formatter { + using iterator = buffer_appender; + using context = buffer_context; + + iterator out; + basic_format_args args; + locale_ref loc; + + template auto operator()(T value) -> iterator { + return write(out, value); + } + auto operator()(typename basic_format_arg::handle h) -> iterator { + basic_format_parse_context parse_ctx({}); + context format_ctx(out, args, loc); + h.format(parse_ctx, format_ctx); + return format_ctx.out(); + } +}; + +template struct arg_formatter { + using iterator = buffer_appender; + using context = buffer_context; + + iterator out; + const basic_format_specs& specs; + locale_ref locale; + + template + FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator { + return detail::write(out, value, specs, locale); + } + auto operator()(typename basic_format_arg::handle) -> iterator { + // User-defined types are handled separately because they require access + // to the parse context. + return out; + } +}; + +template struct custom_formatter { + basic_format_parse_context& parse_ctx; + buffer_context& ctx; + + void operator()( + typename basic_format_arg>::handle h) const { + h.format(parse_ctx, ctx); + } + template void operator()(T) const {} +}; + +template class width_checker { + public: + explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} + + template ::value)> + FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { + if (is_negative(value)) handler_.on_error("negative width"); + return static_cast(value); + } + + template ::value)> + FMT_CONSTEXPR auto operator()(T) -> unsigned long long { + handler_.on_error("width is not integer"); + return 0; + } + + private: + ErrorHandler& handler_; +}; + +template class precision_checker { + public: + explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} + + template ::value)> + FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { + if (is_negative(value)) handler_.on_error("negative precision"); + return static_cast(value); + } + + template ::value)> + FMT_CONSTEXPR auto operator()(T) -> unsigned long long { + handler_.on_error("precision is not integer"); + return 0; + } + + private: + ErrorHandler& handler_; +}; + +template